Mercurial > emacs
annotate lisp/emacs-lisp/rx.el @ 54736:b94de166de9d
(ethio-sera-being-called-by-w3): New
variable.
(ethio-sera-to-fidel-ethio): Check ethio-sera-being-called-by-w3
instead of sera-being-called-by-w3.
(ethio-fidel-to-sera-buffer): Likewise.
(ethio-find-file): Bind ethio-sera-being-called-by-w3 to t
instead of sera-being-called-by-w3.
(ethio-write-file): Likewise.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Mon, 05 Apr 2004 23:27:37 +0000 |
| parents | d2d20534e329 |
| children | 95c1c6487fda |
| rev | line source |
|---|---|
| 39516 | 1 ;;; rx.el --- sexp notation for regular expressions |
| 2 | |
|
54601
d2d20534e329
(rx): Work at compile time, not run time.
Eli Zaretskii <eliz@gnu.org>
parents:
54503
diff
changeset
|
3 ;; Copyright (C) 2001, 2003, 2004 Free Software Foundation, Inc. |
| 39516 | 4 |
| 5 ;; Author: Gerd Moellmann <gerd@gnu.org> | |
| 6 ;; Maintainer: FSF | |
| 7 ;; Keywords: strings, regexps, extensions | |
| 8 | |
| 9 ;; This file is part of GNU Emacs. | |
| 10 | |
| 11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
| 12 ;; it under the terms of the GNU General Public License as published by | |
| 13 ;; the Free Software Foundation; either version 2, or (at your option) | |
| 14 ;; any later version. | |
| 15 | |
| 16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
| 17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 19 ;; GNU General Public License for more details. | |
| 20 | |
| 21 ;; You should have received a copy of the GNU General Public License | |
| 22 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
| 23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 24 ;; Boston, MA 02111-1307, USA. | |
| 25 | |
| 26 ;;; Commentary: | |
| 27 | |
| 28 ;; This is another implementation of sexp-form regular expressions. | |
| 29 ;; It was unfortunately written without being aware of the Sregex | |
| 30 ;; package coming with Emacs, but as things stand, Rx completely | |
| 31 ;; covers all regexp features, which Sregex doesn't, doesn't suffer | |
| 32 ;; from the bugs mentioned in the commentary section of Sregex, and | |
| 33 ;; uses a nicer syntax (IMHO, of course :-). | |
| 34 | |
| 35 ;; Rx translates a sexp notation for regular expressions into the | |
| 36 ;; usual string notation. The translation can be done at compile-time | |
| 37 ;; by using the `rx' macro. It can be done at run-time by calling | |
| 38 ;; function `rx-to-string'. See the documentation of `rx' for a | |
| 39 ;; complete description of the sexp notation. | |
| 40 ;; | |
| 41 ;; Some examples of string regexps and their sexp counterparts: | |
| 42 ;; | |
| 43 ;; "^[a-z]*" | |
| 44 ;; (rx (and line-start (0+ (in "a-z")))) | |
| 45 ;; | |
| 46 ;; "\n[^ \t]" | |
| 47 ;; (rx (and "\n" (not blank))), or | |
| 48 ;; (rx (and "\n" (not (any " \t")))) | |
| 49 ;; | |
| 50 ;; "\\*\\*\\* EOOH \\*\\*\\*\n" | |
| 51 ;; (rx "*** EOOH ***\n") | |
| 52 ;; | |
| 53 ;; "\\<\\(catch\\|finally\\)\\>[^_]" | |
| 54 ;; (rx (and word-start (submatch (or "catch" "finally")) word-end | |
| 55 ;; (not (any ?_)))) | |
| 56 ;; | |
| 57 ;; "[ \t\n]*:\\([^:]+\\|$\\)" | |
| 58 ;; (rx (and (zero-or-more (in " \t\n")) ":" | |
| 59 ;; (submatch (or line-end (one-or-more (not (any ?:))))))) | |
| 60 ;; | |
| 61 ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" | |
| 62 ;; (rx (and line-start | |
| 63 ;; "content-transfer-encoding:" | |
|
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
64 ;; (+ (? ?\n)) blank |
| 39516 | 65 ;; "quoted-printable" |
|
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
66 ;; (+ (? ?\n)) blank)) |
| 39516 | 67 ;; |
| 68 ;; (concat "^\\(?:" something-else "\\)") | |
| 69 ;; (rx (and line-start (eval something-else))), statically or | |
| 70 ;; (rx-to-string '(and line-start ,something-else)), dynamically. | |
| 71 ;; | |
| 72 ;; (regexp-opt '(STRING1 STRING2 ...)) | |
| 73 ;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically | |
| 74 ;; calls `regexp-opt' as needed. | |
| 75 ;; | |
| 76 ;; "^;;\\s-*\n\\|^\n" | |
| 77 ;; (rx (or (and line-start ";;" (0+ space) ?\n) | |
| 78 ;; (and line-start ?\n))) | |
| 79 ;; | |
| 80 ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
81 ;; (rx (and "$Id: " |
|
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
82 ;; (1+ (not (in " "))) |
| 39516 | 83 ;; " " |
| 84 ;; (submatch (1+ (not (in " ")))) | |
|
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
85 ;; " ")) |
| 39516 | 86 ;; |
| 87 ;; "\\\\\\\\\\[\\w+" | |
| 88 ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) | |
| 89 ;; | |
| 90 ;; etc. | |
| 91 | |
| 92 ;;; History: | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
93 ;; |
| 39516 | 94 |
| 95 ;;; Code: | |
| 96 | |
| 97 | |
| 98 (defconst rx-constituents | |
| 99 '((and . (rx-and 1 nil)) | |
| 100 (or . (rx-or 1 nil)) | |
| 101 (not-newline . ".") | |
| 102 (anything . ".\\|\n") | |
| 103 (any . (rx-any 1 1 rx-check-any)) | |
| 104 (in . any) | |
| 105 (not . (rx-not 1 1 rx-check-not)) | |
| 106 (repeat . (rx-repeat 2 3)) | |
| 107 (submatch . (rx-submatch 1 nil)) | |
| 108 (group . submatch) | |
| 109 (zero-or-more . (rx-kleene 1 1)) | |
| 110 (one-or-more . (rx-kleene 1 1)) | |
| 111 (zero-or-one . (rx-kleene 1 1)) | |
| 112 (\? . zero-or-one) | |
| 113 (\?? . zero-or-one) | |
| 114 (* . zero-or-more) | |
| 115 (*? . zero-or-more) | |
| 116 (0+ . zero-or-more) | |
| 117 (+ . one-or-more) | |
| 118 (+? . one-or-more) | |
| 119 (1+ . one-or-more) | |
| 120 (optional . zero-or-one) | |
| 121 (minimal-match . (rx-greedy 1 1)) | |
| 122 (maximal-match . (rx-greedy 1 1)) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
123 (backref . (rx-backref 1 1 rx-check-backref)) |
| 39516 | 124 (line-start . "^") |
| 125 (line-end . "$") | |
| 126 (string-start . "\\`") | |
| 127 (string-end . "\\'") | |
| 128 (buffer-start . "\\`") | |
| 129 (buffer-end . "\\'") | |
| 130 (point . "\\=") | |
| 131 (word-start . "\\<") | |
| 132 (word-end . "\\>") | |
| 133 (word-boundary . "\\b") | |
| 134 (syntax . (rx-syntax 1 1)) | |
| 135 (category . (rx-category 1 1 rx-check-category)) | |
| 136 (eval . (rx-eval 1 1)) | |
| 137 (regexp . (rx-regexp 1 1 stringp)) | |
| 138 (digit . "[[:digit:]]") | |
| 139 (control . "[[:cntrl:]]") | |
| 140 (hex-digit . "[[:xdigit:]]") | |
| 141 (blank . "[[:blank:]]") | |
| 142 (graphic . "[[:graph:]]") | |
| 143 (printing . "[[:print:]]") | |
| 144 (alphanumeric . "[[:alnum:]]") | |
| 145 (letter . "[[:alpha:]]") | |
| 146 (ascii . "[[:ascii:]]") | |
| 147 (nonascii . "[[:nonascii:]]") | |
| 148 (lower . "[[:lower:]]") | |
| 149 (punctuation . "[[:punct:]]") | |
| 150 (space . "[[:space:]]") | |
| 151 (upper . "[[:upper:]]") | |
| 152 (word . "[[:word:]]")) | |
| 153 "Alist of sexp form regexp constituents. | |
| 154 Each element of the alist has the form (SYMBOL . DEFN). | |
| 155 SYMBOL is a valid constituent of sexp regular expressions. | |
| 156 If DEFN is a string, SYMBOL is translated into DEFN. | |
| 157 If DEFN is a symbol, use the definition of DEFN, recursively. | |
| 158 Otherwise, DEFN must be a list (FUNCTION MIN-ARGS MAX-ARGS PREDICATE). | |
| 159 FUNCTION is used to produce code for SYMBOL. MIN-ARGS and MAX-ARGS | |
| 160 are the minimum and maximum number of arguments the function-form | |
| 161 sexp constituent SYMBOL may have in sexp regular expressions. | |
| 162 MAX-ARGS nil means no limit. PREDICATE, if specified, means that | |
| 163 all arguments must satisfy PREDICATE.") | |
| 164 | |
| 165 | |
| 166 (defconst rx-syntax | |
| 167 '((whitespace . ?-) | |
| 168 (punctuation . ?.) | |
| 169 (word . ?w) | |
| 170 (symbol . ?_) | |
| 171 (open-parenthesis . ?\() | |
| 172 (close-parenthesis . ?\)) | |
| 173 (expression-prefix . ?\') | |
| 174 (string-quote . ?\") | |
| 175 (paired-delimiter . ?$) | |
| 176 (escape . ?\\) | |
| 177 (character-quote . ?/) | |
| 178 (comment-start . ?<) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
179 (comment-end . ?>) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
180 (string-delimiter . ?|) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
181 (comment-delimiter . ?!)) |
| 39516 | 182 "Alist mapping Rx syntax symbols to syntax characters. |
| 183 Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid | |
| 184 symbol in `(syntax SYMBOL)', and CHAR is the syntax character | |
| 185 corresponding to SYMBOL, as it would be used with \\s or \\S in | |
| 186 regular expressions.") | |
| 187 | |
| 188 | |
| 189 (defconst rx-categories | |
| 190 '((consonant . ?0) | |
| 191 (base-vowel . ?1) | |
| 192 (upper-diacritical-mark . ?2) | |
| 193 (lower-diacritical-mark . ?3) | |
| 194 (tone-mark . ?4) | |
| 195 (symbol . ?5) | |
| 196 (digit . ?6) | |
| 197 (vowel-modifying-diacritical-mark . ?7) | |
| 198 (vowel-sign . ?8) | |
| 199 (semivowel-lower . ?9) | |
| 200 (not-at-end-of-line . ?<) | |
| 201 (not-at-beginning-of-line . ?>) | |
| 202 (alpha-numeric-two-byte . ?A) | |
| 203 (chinse-two-byte . ?C) | |
| 204 (greek-two-byte . ?G) | |
| 205 (japanese-hiragana-two-byte . ?H) | |
| 206 (indian-two-byte . ?I) | |
| 207 (japanese-katakana-two-byte . ?K) | |
| 208 (korean-hangul-two-byte . ?N) | |
| 209 (cyrillic-two-byte . ?Y) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
210 (combining-diacritic . ?^) |
| 39516 | 211 (ascii . ?a) |
| 212 (arabic . ?b) | |
| 213 (chinese . ?c) | |
| 214 (ethiopic . ?e) | |
| 215 (greek . ?g) | |
| 216 (korean . ?h) | |
| 217 (indian . ?i) | |
| 218 (japanese . ?j) | |
| 219 (japanese-katakana . ?k) | |
| 220 (latin . ?l) | |
| 221 (lao . ?o) | |
| 222 (tibetan . ?q) | |
| 223 (japanese-roman . ?r) | |
| 224 (thai . ?t) | |
| 225 (vietnamese . ?v) | |
| 226 (hebrew . ?w) | |
| 227 (cyrillic . ?y) | |
| 228 (can-break . ?|)) | |
| 229 "Alist mapping symbols to category characters. | |
| 230 Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid | |
| 231 symbol in `(category SYMBOL)', and CHAR is the category character | |
| 232 corresponding to SYMBOL, as it would be used with `\\c' or `\\C' in | |
| 233 regular expression strings.") | |
| 234 | |
| 235 | |
| 236 (defvar rx-greedy-flag t | |
| 237 "Non-nil means produce greedy regular expressions for `zero-or-one', | |
| 238 `zero-or-more', and `one-or-more'. Dynamically bound.") | |
| 239 | |
| 240 | |
| 241 (defun rx-info (op) | |
| 242 "Return parsing/code generation info for OP. | |
| 243 If OP is the space character ASCII 32, return info for the symbol `?'. | |
| 244 If OP is the character `?', return info for the symbol `??'. | |
| 245 See also `rx-constituents'." | |
| 246 (cond ((eq op ? ) (setq op '\?)) | |
| 247 ((eq op ??) (setq op '\??))) | |
| 248 (while (and (not (null op)) (symbolp op)) | |
| 249 (setq op (cdr (assq op rx-constituents)))) | |
| 250 op) | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
251 |
| 39516 | 252 |
| 253 (defun rx-check (form) | |
| 254 "Check FORM according to its car's parsing info." | |
| 255 (let* ((rx (rx-info (car form))) | |
| 256 (nargs (1- (length form))) | |
| 257 (min-args (nth 1 rx)) | |
| 258 (max-args (nth 2 rx)) | |
| 259 (type-pred (nth 3 rx))) | |
| 260 (when (and (not (null min-args)) | |
| 261 (< nargs min-args)) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
262 (error "rx form `%s' requires at least %d args" |
| 39516 | 263 (car form) min-args)) |
| 264 (when (and (not (null max-args)) | |
| 265 (> nargs max-args)) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
266 (error "rx form `%s' accepts at most %d args" |
| 39516 | 267 (car form) max-args)) |
| 268 (when (not (null type-pred)) | |
| 269 (dolist (sub-form (cdr form)) | |
| 270 (unless (funcall type-pred sub-form) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
271 (error "rx form `%s' requires args satisfying `%s'" |
| 39516 | 272 (car form) type-pred)))))) |
| 273 | |
| 274 | |
| 275 (defun rx-and (form) | |
| 276 "Parse and produce code from FORM. | |
| 277 FORM is of the form `(and FORM1 ...)'." | |
| 278 (rx-check form) | |
|
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
279 (concat "\\(?:" |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
280 (mapconcat |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
281 (function (lambda (x) (rx-to-string x 'no-group))) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
282 (cdr form) nil) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
283 "\\)")) |
| 39516 | 284 |
| 285 | |
| 286 (defun rx-or (form) | |
| 287 "Parse and produce code from FORM, which is `(or FORM1 ...)'." | |
| 288 (rx-check form) | |
| 289 (let ((all-args-strings t)) | |
| 290 (dolist (arg (cdr form)) | |
| 291 (unless (stringp arg) | |
| 292 (setq all-args-strings nil))) | |
|
52971
d5c1eeaa97e2
(rx-or): Fix the case of "(rx (and ?a (or ?b ?c) ?d))".
Eli Zaretskii <eliz@gnu.org>
parents:
52401
diff
changeset
|
293 (concat "\\(?:" |
|
d5c1eeaa97e2
(rx-or): Fix the case of "(rx (and ?a (or ?b ?c) ?d))".
Eli Zaretskii <eliz@gnu.org>
parents:
52401
diff
changeset
|
294 (if all-args-strings |
|
d5c1eeaa97e2
(rx-or): Fix the case of "(rx (and ?a (or ?b ?c) ?d))".
Eli Zaretskii <eliz@gnu.org>
parents:
52401
diff
changeset
|
295 (regexp-opt (cdr form)) |
|
d5c1eeaa97e2
(rx-or): Fix the case of "(rx (and ?a (or ?b ?c) ?d))".
Eli Zaretskii <eliz@gnu.org>
parents:
52401
diff
changeset
|
296 (mapconcat #'rx-to-string (cdr form) "\\|")) |
|
d5c1eeaa97e2
(rx-or): Fix the case of "(rx (and ?a (or ?b ?c) ?d))".
Eli Zaretskii <eliz@gnu.org>
parents:
52401
diff
changeset
|
297 "\\)"))) |
| 39516 | 298 |
| 299 | |
| 300 (defun rx-quote-for-set (string) | |
| 301 "Transform STRING for use in a character set. | |
| 302 If STRING contains a `]', move it to the front. | |
| 303 If STRING starts with a '^', move it to the end." | |
| 304 (when (string-match "\\`\\(\\(?:.\\|\n\\)+\\)\\]\\(\\(?:.\\|\n\\)\\)*\\'" | |
| 305 string) | |
| 306 (setq string (concat "]" (match-string 1 string) | |
| 307 (match-string 2 string)))) | |
| 308 (when (string-match "\\`^\\(\\(?:.\\|\n\\)+\\)\\'" string) | |
| 309 (setq string (concat (substring string 1) "^"))) | |
| 310 string) | |
| 311 | |
| 312 | |
| 313 (defun rx-check-any (arg) | |
| 314 "Check arg ARG for Rx `any'." | |
| 315 (cond ((integerp arg) t) | |
| 316 ((and (stringp arg) (zerop (length arg))) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
317 (error "String arg for rx `any' must not be empty")) |
| 39516 | 318 ((stringp arg) t) |
| 319 (t | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
320 (error "rx `any' requires string or character arg")))) |
| 39516 | 321 |
| 322 | |
| 323 (defun rx-any (form) | |
| 324 "Parse and produce code from FORM, which is `(any STRING)'. | |
| 325 STRING is optional. If it is omitted, build a regexp that | |
| 326 matches anything." | |
| 327 (rx-check form) | |
| 328 (let ((arg (cadr form))) | |
| 329 (cond ((integerp arg) | |
| 330 (char-to-string arg)) | |
| 331 ((= (length arg) 1) | |
| 332 arg) | |
| 333 (t | |
| 334 (concat "[" (rx-quote-for-set (cadr form)) "]"))))) | |
| 335 | |
| 336 | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
337 (defun rx-check-not (arg) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
338 "Check arg ARG for Rx `not'." |
| 39516 | 339 (unless (or (memq form |
| 340 '(digit control hex-digit blank graphic printing | |
| 341 alphanumeric letter ascii nonascii lower | |
| 342 punctuation space upper word)) | |
| 343 (and (consp form) | |
| 344 (memq (car form) '(not any in syntax category:)))) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
345 (error "rx `not' syntax error: %s" form)) |
| 39516 | 346 t) |
| 347 | |
| 348 | |
| 349 (defun rx-not (form) | |
| 350 "Parse and produce code from FORM. FORM is `(not ...)'." | |
| 351 (rx-check form) | |
|
53974
818e19ae4c5a
(rx-not): Bind case-fold-search to nil.
Eli Zaretskii <eliz@is.elta.co.il>
parents:
52971
diff
changeset
|
352 (let ((result (rx-to-string (cadr form) 'no-group)) |
|
818e19ae4c5a
(rx-not): Bind case-fold-search to nil.
Eli Zaretskii <eliz@is.elta.co.il>
parents:
52971
diff
changeset
|
353 case-fold-search) |
| 39516 | 354 (cond ((string-match "\\`\\[^" result) |
| 355 (if (= (length result) 4) | |
| 356 (substring result 2 3) | |
| 357 (concat "[" (substring result 2)))) | |
| 358 ((string-match "\\`\\[" result) | |
| 359 (concat "[^" (substring result 1))) | |
| 360 ((string-match "\\`\\\\s." result) | |
| 361 (concat "\\S" (substring result 2))) | |
| 362 ((string-match "\\`\\\\S." result) | |
| 363 (concat "\\s" (substring result 2))) | |
| 364 ((string-match "\\`\\\\c." result) | |
| 365 (concat "\\C" (substring result 2))) | |
| 366 ((string-match "\\`\\\\C." result) | |
| 367 (concat "\\c" (substring result 2))) | |
| 368 ((string-match "\\`\\\\B" result) | |
| 369 (concat "\\b" (substring result 2))) | |
| 370 ((string-match "\\`\\\\b" result) | |
| 371 (concat "\\B" (substring result 2))) | |
| 372 (t | |
| 373 (concat "[^" result "]"))))) | |
| 374 | |
| 375 | |
| 376 (defun rx-repeat (form) | |
| 377 "Parse and produce code from FORM. | |
| 378 FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." | |
| 379 (rx-check form) | |
| 380 (cond ((= (length form) 3) | |
| 381 (unless (and (integerp (nth 1 form)) | |
| 382 (> (nth 1 form) 0)) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
383 (error "rx `repeat' requires positive integer first arg")) |
| 39516 | 384 (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form))) |
| 385 ((or (not (integerp (nth 2 form))) | |
| 386 (< (nth 2 form) 0) | |
| 387 (not (integerp (nth 1 form))) | |
| 388 (< (nth 1 form) 0) | |
| 389 (< (nth 2 form) (nth 1 form))) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
390 (error "rx `repeat' range error")) |
| 39516 | 391 (t |
| 392 (format "%s\\{%d,%d\\}" (rx-to-string (nth 3 form)) | |
| 393 (nth 1 form) (nth 2 form))))) | |
| 394 | |
| 395 | |
| 396 (defun rx-submatch (form) | |
| 397 "Parse and produce code from FORM, which is `(submatch ...)'." | |
|
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
398 (concat "\\(" |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
399 (mapconcat (function (lambda (x) (rx-to-string x 'no-group))) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
400 (cdr form) nil) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
401 "\\)")) |
| 39516 | 402 |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
403 (defun rx-backref (form) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
404 "Parse and produce code from FORM, which is `(backref N)'." |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
405 (rx-check form) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
406 (format "\\%d" (nth 1 form))) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
407 |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
408 (defun rx-check-backref (arg) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
409 "Check arg ARG for Rx `backref'." |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
410 (or (and (integerp arg) (>= arg 1) (<= arg 9)) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
411 (error "rx `backref' requires numeric 1<=arg<=9: %s" arg))) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
412 |
| 39516 | 413 (defun rx-kleene (form) |
| 414 "Parse and produce code from FORM. | |
| 415 FORM is `(OP FORM1)', where OP is one of the `zero-or-one', | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
416 `zero-or-more' etc. operators. |
| 39516 | 417 If OP is one of `*', `+', `?', produce a greedy regexp. |
| 418 If OP is one of `*?', `+?', `??', produce a non-greedy regexp. | |
| 419 If OP is anything else, produce a greedy regexp if `rx-greedy-flag' | |
| 420 is non-nil." | |
| 421 (rx-check form) | |
| 422 (let ((suffix (cond ((memq (car form) '(* + ? )) "") | |
| 423 ((memq (car form) '(*? +? ??)) "?") | |
| 424 (rx-greedy-flag "") | |
| 425 (t "?"))) | |
| 426 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") | |
| 427 ((memq (car form) '(+ +? 1+ one-or-more)) "+") | |
|
48938
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
428 (t "?"))) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
429 (result (rx-to-string (cadr form) 'no-group))) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
430 (if (not (rx-atomic-p result)) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
431 (setq result (concat "\\(?:" result "\\)"))) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
432 (concat result op suffix))) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
433 |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
434 (defun rx-atomic-p (r) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
435 "Return non-nil if regexp string R is atomic. |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
436 An atomic regexp R is one such that a suffix operator |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
437 appended to R will apply to all of R. For example, \"a\" |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
438 \"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\", |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
439 \"[ab]c\", and \"ab\\|ab*c\" are not atomic. |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
440 |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
441 This function may return false negatives, but it will not |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
442 return false positives. It is nevertheless useful in |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
443 situations where an efficiency shortcut can be taken iff a |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
444 regexp is atomic. The function can be improved to detect |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
445 more cases of atomic regexps. Presently, this function |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
446 detects the following categories of atomic regexp; |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
447 |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
448 a group or shy group: \\(...\\) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
449 a character class: [...] |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
450 a single character: a |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
451 |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
452 On the other hand, false negatives will be returned for |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
453 regexps that are atomic but end in operators, such as |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
454 \"a+\". I think these are rare. Probably such cases could |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
455 be detected without much effort. A guarantee of no false |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
456 negatives would require a theoretic specification of the set |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
457 of all atomic regexps." |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
458 (let ((l (length r))) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
459 (or (equal l 1) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
460 (and (>= l 6) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
461 (equal (substring r 0 2) "\\(") |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
462 (equal (substring r -2) "\\)")) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
463 (and (>= l 2) |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
464 (equal (substring r 0 1) "[") |
|
05f00479612c
(rx-and): Generate a shy group.
Richard M. Stallman <rms@gnu.org>
parents:
47257
diff
changeset
|
465 (equal (substring r -1) "]"))))) |
| 39516 | 466 |
| 467 | |
| 468 (defun rx-syntax (form) | |
| 469 "Parse and produce code from FORM, which is `(syntax SYMBOL)'." | |
| 470 (rx-check form) | |
| 471 (let ((syntax (assq (cadr form) rx-syntax))) | |
| 472 (unless syntax | |
| 473 (error "Unknown rx syntax `%s'" (cadr form))) | |
| 474 (format "\\s%c" (cdr syntax)))) | |
| 475 | |
| 476 | |
| 477 (defun rx-check-category (form) | |
| 478 "Check the argument FORM of a `(category FORM)'." | |
| 479 (unless (or (integerp form) | |
| 480 (cdr (assq form rx-categories))) | |
| 481 (error "Unknown category `%s'" form)) | |
| 482 t) | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
483 |
| 39516 | 484 |
| 485 (defun rx-category (form) | |
| 486 "Parse and produce code from FORM, which is `(category SYMBOL ...)'." | |
| 487 (rx-check form) | |
| 488 (let ((char (if (integerp (cadr form)) | |
| 489 (cadr form) | |
| 490 (cdr (assq (cadr form) rx-categories))))) | |
| 491 (format "\\c%c" char))) | |
| 492 | |
| 493 | |
| 494 (defun rx-eval (form) | |
| 495 "Parse and produce code from FORM, which is `(eval FORM)'." | |
| 496 (rx-check form) | |
| 497 (rx-to-string (eval (cadr form)))) | |
| 498 | |
| 499 | |
| 500 (defun rx-greedy (form) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
501 "Parse and produce code from FORM. |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
502 If FORM is '(minimal-match FORM1)', non-greedy versions of `*', |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
503 `+', and `?' operators will be used in FORM1. If FORM is |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
504 '(maximal-match FORM1)', greedy operators will be used." |
| 39516 | 505 (rx-check form) |
| 506 (let ((rx-greedy-flag (eq (car form) 'maximal-match))) | |
| 507 (rx-to-string (cadr form)))) | |
| 508 | |
| 509 | |
| 510 (defun rx-regexp (form) | |
| 511 "Parse and produce code from FORM, which is `(regexp STRING)'." | |
| 512 (rx-check form) | |
| 513 (concat "\\(?:" (cadr form) "\\)")) | |
| 514 | |
| 515 | |
| 516 ;;;###autoload | |
| 517 (defun rx-to-string (form &optional no-group) | |
| 518 "Parse and produce code for regular expression FORM. | |
| 519 FORM is a regular expression in sexp form. | |
| 520 NO-GROUP non-nil means don't put shy groups around the result." | |
| 521 (cond ((stringp form) | |
| 522 (regexp-quote form)) | |
| 523 ((integerp form) | |
| 524 (regexp-quote (char-to-string form))) | |
| 525 ((symbolp form) | |
| 526 (let ((info (rx-info form))) | |
| 527 (cond ((stringp info) | |
| 528 info) | |
| 529 ((null info) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
530 (error "Unknown rx form `%s'" form)) |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
531 (t |
| 39516 | 532 (funcall (nth 0 info) form))))) |
| 533 ((consp form) | |
| 534 (let ((info (rx-info (car form)))) | |
| 535 (unless (consp info) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
536 (error "Unknown rx form `%s'" (car form))) |
| 39516 | 537 (let ((result (funcall (nth 0 info) form))) |
| 538 (if (or no-group (string-match "\\`\\\\[(]" result)) | |
| 539 result | |
| 540 (concat "\\(?:" result "\\)"))))) | |
| 541 (t | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
542 (error "rx syntax error at `%s'" form)))) |
| 39516 | 543 |
| 544 | |
| 545 ;;;###autoload | |
| 546 (defmacro rx (regexp) | |
| 547 "Translate a regular expression REGEXP in sexp form to a regexp string. | |
| 548 See also `rx-to-string' for how to do such a translation at run-time. | |
| 549 | |
| 550 The following are valid subforms of regular expressions in sexp | |
| 551 notation. | |
| 552 | |
| 553 STRING | |
| 554 matches string STRING literally. | |
| 555 | |
| 556 CHAR | |
| 557 matches character CHAR literally. | |
| 558 | |
| 559 `not-newline' | |
| 560 matches any character except a newline. | |
| 561 . | |
| 562 `anything' | |
| 563 matches any character | |
| 564 | |
| 565 `(any SET)' | |
| 566 matches any character in SET. SET may be a character or string. | |
| 567 Ranges of characters can be specified as `A-Z' in strings. | |
| 568 | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
569 '(in SET)' |
| 39516 | 570 like `any'. |
| 571 | |
| 572 `(not (any SET))' | |
| 573 matches any character not in SET | |
| 574 | |
| 575 `line-start' | |
| 576 matches the empty string, but only at the beginning of a line | |
| 577 in the text being matched | |
| 578 | |
| 579 `line-end' | |
| 580 is similar to `line-start' but matches only at the end of a line | |
| 581 | |
| 582 `string-start' | |
| 583 matches the empty string, but only at the beginning of the | |
| 584 string being matched against. | |
| 585 | |
| 586 `string-end' | |
| 587 matches the empty string, but only at the end of the | |
| 588 string being matched against. | |
| 589 | |
| 590 `buffer-start' | |
| 591 matches the empty string, but only at the beginning of the | |
| 592 buffer being matched against. | |
| 593 | |
| 594 `buffer-end' | |
| 595 matches the empty string, but only at the end of the | |
| 596 buffer being matched against. | |
| 597 | |
| 598 `point' | |
| 599 matches the empty string, but only at point. | |
| 600 | |
| 601 `word-start' | |
| 602 matches the empty string, but only at the beginning or end of a | |
| 603 word. | |
| 604 | |
| 605 `word-end' | |
| 606 matches the empty string, but only at the end of a word. | |
| 607 | |
| 608 `word-boundary' | |
| 609 matches the empty string, but only at the beginning or end of a | |
| 610 word. | |
| 611 | |
| 612 `(not word-boundary)' | |
| 613 matches the empty string, but not at the beginning or end of a | |
| 614 word. | |
| 615 | |
| 616 `digit' | |
| 617 matches 0 through 9. | |
| 618 | |
| 619 `control' | |
| 620 matches ASCII control characters. | |
| 621 | |
| 622 `hex-digit' | |
| 623 matches 0 through 9, a through f and A through F. | |
| 624 | |
| 625 `blank' | |
| 626 matches space and tab only. | |
| 627 | |
| 628 `graphic' | |
| 629 matches graphic characters--everything except ASCII control chars, | |
| 630 space, and DEL. | |
| 631 | |
| 632 `printing' | |
| 633 matches printing characters--everything except ASCII control chars | |
| 634 and DEL. | |
| 635 | |
| 636 `alphanumeric' | |
| 637 matches letters and digits. (But at present, for multibyte characters, | |
| 638 it matches anything that has word syntax.) | |
| 639 | |
| 640 `letter' | |
| 641 matches letters. (But at present, for multibyte characters, | |
| 642 it matches anything that has word syntax.) | |
| 643 | |
| 644 `ascii' | |
| 645 matches ASCII (unibyte) characters. | |
| 646 | |
| 647 `nonascii' | |
| 648 matches non-ASCII (multibyte) characters. | |
| 649 | |
| 650 `lower' | |
| 651 matches anything lower-case. | |
| 652 | |
| 653 `upper' | |
| 654 matches anything upper-case. | |
| 655 | |
| 656 `punctuation' | |
| 657 matches punctuation. (But at present, for multibyte characters, | |
| 658 it matches anything that has non-word syntax.) | |
| 659 | |
| 660 `space' | |
| 661 matches anything that has whitespace syntax. | |
| 662 | |
| 663 `word' | |
| 664 matches anything that has word syntax. | |
| 665 | |
| 666 `(syntax SYNTAX)' | |
| 667 matches a character with syntax SYNTAX. SYNTAX must be one | |
| 668 of the following symbols. | |
| 669 | |
| 670 `whitespace' (\\s- in string notation) | |
| 671 `punctuation' (\\s.) | |
| 672 `word' (\\sw) | |
| 673 `symbol' (\\s_) | |
| 674 `open-parenthesis' (\\s() | |
| 675 `close-parenthesis' (\\s)) | |
| 676 `expression-prefix' (\\s') | |
| 677 `string-quote' (\\s\") | |
| 678 `paired-delimiter' (\\s$) | |
| 679 `escape' (\\s\\) | |
| 680 `character-quote' (\\s/) | |
| 681 `comment-start' (\\s<) | |
| 682 `comment-end' (\\s>) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
683 `string-delimiter' (\\s|) |
|
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
684 `comment-delimiter' (\\s!) |
| 39516 | 685 |
| 686 `(not (syntax SYNTAX))' | |
| 687 matches a character that has not syntax SYNTAX. | |
| 688 | |
| 689 `(category CATEGORY)' | |
| 690 matches a character with category CATEGORY. CATEGORY must be | |
| 691 either a character to use for C, or one of the following symbols. | |
| 692 | |
| 693 `consonant' (\\c0 in string notation) | |
| 694 `base-vowel' (\\c1) | |
| 695 `upper-diacritical-mark' (\\c2) | |
| 696 `lower-diacritical-mark' (\\c3) | |
| 697 `tone-mark' (\\c4) | |
| 698 `symbol' (\\c5) | |
| 699 `digit' (\\c6) | |
| 700 `vowel-modifying-diacritical-mark' (\\c7) | |
| 701 `vowel-sign' (\\c8) | |
| 702 `semivowel-lower' (\\c9) | |
| 703 `not-at-end-of-line' (\\c<) | |
| 704 `not-at-beginning-of-line' (\\c>) | |
| 705 `alpha-numeric-two-byte' (\\cA) | |
| 706 `chinse-two-byte' (\\cC) | |
| 707 `greek-two-byte' (\\cG) | |
| 708 `japanese-hiragana-two-byte' (\\cH) | |
| 709 `indian-tow-byte' (\\cI) | |
| 710 `japanese-katakana-two-byte' (\\cK) | |
| 711 `korean-hangul-two-byte' (\\cN) | |
| 712 `cyrillic-two-byte' (\\cY) | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
713 `combining-diacritic' (\\c^) |
| 39516 | 714 `ascii' (\\ca) |
| 715 `arabic' (\\cb) | |
| 716 `chinese' (\\cc) | |
| 717 `ethiopic' (\\ce) | |
| 718 `greek' (\\cg) | |
| 719 `korean' (\\ch) | |
| 720 `indian' (\\ci) | |
| 721 `japanese' (\\cj) | |
| 722 `japanese-katakana' (\\ck) | |
| 723 `latin' (\\cl) | |
| 724 `lao' (\\co) | |
| 725 `tibetan' (\\cq) | |
| 726 `japanese-roman' (\\cr) | |
| 727 `thai' (\\ct) | |
| 728 `vietnamese' (\\cv) | |
| 729 `hebrew' (\\cw) | |
| 730 `cyrillic' (\\cy) | |
| 731 `can-break' (\\c|) | |
| 732 | |
| 733 `(not (category CATEGORY))' | |
| 734 matches a character that has not category CATEGORY. | |
| 735 | |
| 736 `(and SEXP1 SEXP2 ...)' | |
| 737 matches what SEXP1 matches, followed by what SEXP2 matches, etc. | |
| 738 | |
| 739 `(submatch SEXP1 SEXP2 ...)' | |
| 740 like `and', but makes the match accessible with `match-end', | |
| 741 `match-beginning', and `match-string'. | |
| 742 | |
| 743 `(group SEXP1 SEXP2 ...)' | |
| 744 another name for `submatch'. | |
| 745 | |
| 746 `(or SEXP1 SEXP2 ...)' | |
| 747 matches anything that matches SEXP1 or SEXP2, etc. If all | |
| 748 args are strings, use `regexp-opt' to optimize the resulting | |
| 749 regular expression. | |
| 750 | |
| 751 `(minimal-match SEXP)' | |
| 752 produce a non-greedy regexp for SEXP. Normally, regexps matching | |
|
53992
c5c237251824
(rx-check, rx-check-any, rx-check-not)
Eli Zaretskii <eliz@is.elta.co.il>
parents:
53974
diff
changeset
|
753 zero or more occurrences of something are \"greedy\" in that they |
| 39516 | 754 match as much as they can, as long as the overall regexp can |
| 755 still match. A non-greedy regexp matches as little as possible. | |
| 756 | |
| 757 `(maximal-match SEXP)' | |
| 47257 | 758 produce a greedy regexp for SEXP. This is the default. |
| 39516 | 759 |
| 760 `(zero-or-more SEXP)' | |
| 761 matches zero or more occurrences of what SEXP matches. | |
| 762 | |
| 763 `(0+ SEXP)' | |
| 764 like `zero-or-more'. | |
| 765 | |
| 766 `(* SEXP)' | |
| 767 like `zero-or-more', but always produces a greedy regexp. | |
| 768 | |
| 769 `(*? SEXP)' | |
| 770 like `zero-or-more', but always produces a non-greedy regexp. | |
| 771 | |
| 772 `(one-or-more SEXP)' | |
| 773 matches one or more occurrences of A. | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
774 |
| 39516 | 775 `(1+ SEXP)' |
| 776 like `one-or-more'. | |
| 777 | |
| 778 `(+ SEXP)' | |
| 779 like `one-or-more', but always produces a greedy regexp. | |
| 780 | |
| 781 `(+? SEXP)' | |
| 782 like `one-or-more', but always produces a non-greedy regexp. | |
| 783 | |
| 784 `(zero-or-one SEXP)' | |
| 785 matches zero or one occurrences of A. | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
48938
diff
changeset
|
786 |
| 39516 | 787 `(optional SEXP)' |
| 788 like `zero-or-one'. | |
| 789 | |
| 790 `(? SEXP)' | |
| 791 like `zero-or-one', but always produces a greedy regexp. | |
| 792 | |
| 793 `(?? SEXP)' | |
| 794 like `zero-or-one', but always produces a non-greedy regexp. | |
| 795 | |
| 796 `(repeat N SEXP)' | |
| 797 matches N occurrences of what SEXP matches. | |
| 798 | |
| 799 `(repeat N M SEXP)' | |
| 800 matches N to M occurrences of what SEXP matches. | |
| 801 | |
|
54461
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
802 `(backref N)' |
|
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
803 matches what was matched previously by submatch N. |
|
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
804 |
|
54601
d2d20534e329
(rx): Work at compile time, not run time.
Eli Zaretskii <eliz@gnu.org>
parents:
54503
diff
changeset
|
805 `(backref N)' |
|
d2d20534e329
(rx): Work at compile time, not run time.
Eli Zaretskii <eliz@gnu.org>
parents:
54503
diff
changeset
|
806 matches what was matched previously by submatch N. |
|
d2d20534e329
(rx): Work at compile time, not run time.
Eli Zaretskii <eliz@gnu.org>
parents:
54503
diff
changeset
|
807 |
| 39516 | 808 `(eval FORM)' |
|
54461
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
809 evaluate FORM and insert result. If result is a string, |
|
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
810 `regexp-quote' it. |
| 39516 | 811 |
| 812 `(regexp REGEXP)' | |
|
54461
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
813 include REGEXP in string notation in the result." |
| 39516 | 814 |
|
54461
5c8be4779a36
(rx): Work at compile time, not run time.
Juanma Barranquero <lekktu@gmail.com>
parents:
53992
diff
changeset
|
815 (rx-to-string regexp)) |
| 39516 | 816 |
| 817 (provide 'rx) | |
| 818 | |
| 52401 | 819 ;;; arch-tag: 12d01a63-0008-42bb-ab8c-1c7d63be370b |
| 39516 | 820 ;;; rx.el ends here |
