Mercurial > emacs
changeset 48938:05f00479612c
(rx-and): Generate a shy group.
Specify `no-group' when calling rx-to-string.
(rx-submatch): Specify `no-group' when calling rx-to-string.
(rx-kleene): Use rx-atomic-p to decide whether to make a group.
(rx-atomic-p): New function.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Mon, 23 Dec 2002 17:43:24 +0000 |
parents | 3cabed8b65b7 |
children | f3fc48331bdc |
files | lisp/emacs-lisp/rx.el |
diffstat | 1 files changed, 51 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/emacs-lisp/rx.el Sun Dec 22 23:14:52 2002 +0000 +++ b/lisp/emacs-lisp/rx.el Mon Dec 23 17:43:24 2002 +0000 @@ -61,9 +61,9 @@ ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" ;; (rx (and line-start ;; "content-transfer-encoding:" -;; (+ (? ?\n) blank) +;; (+ (? ?\n)) blank ;; "quoted-printable" -;; (+ (? ?\n) blank)) +;; (+ (? ?\n)) blank)) ;; ;; (concat "^\\(?:" something-else "\\)") ;; (rx (and line-start (eval something-else))), statically or @@ -78,11 +78,11 @@ ;; (and line-start ?\n))) ;; ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " -;; (rx (and "$Id": " +;; (rx (and "$Id: " ;; (1+ (not (in " "))) ;; " " ;; (submatch (1+ (not (in " ")))) -;; " "))) +;; " ")) ;; ;; "\\\\\\\\\\[\\w+" ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) @@ -272,7 +272,11 @@ "Parse and produce code from FORM. FORM is of the form `(and FORM1 ...)'." (rx-check form) - (mapconcat #'rx-to-string (cdr form) nil)) + (concat "\\(?:" + (mapconcat + (function (lambda (x) (rx-to-string x 'no-group))) + (cdr form) nil) + "\\)")) (defun rx-or (form) @@ -384,8 +388,10 @@ (defun rx-submatch (form) "Parse and produce code from FORM, which is `(submatch ...)'." - (concat "\\(" (mapconcat #'rx-to-string (cdr form) nil) "\\)")) - + (concat "\\(" + (mapconcat (function (lambda (x) (rx-to-string x 'no-group))) + (cdr form) nil) + "\\)")) (defun rx-kleene (form) "Parse and produce code from FORM. @@ -402,9 +408,44 @@ (t "?"))) (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") ((memq (car form) '(+ +? 1+ one-or-more)) "+") - (t "?")))) - (format "\\(?:%s\\)%s%s" (rx-to-string (cadr form) 'no-group) - op suffix))) + (t "?"))) + (result (rx-to-string (cadr form) 'no-group))) + (if (not (rx-atomic-p result)) + (setq result (concat "\\(?:" result "\\)"))) + (concat result op suffix))) + +(defun rx-atomic-p (r) + "Return non-nil if regexp string R is atomic. +An atomic regexp R is one such that a suffix operator +appended to R will apply to all of R. For example, \"a\" +\"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\", +\"[ab]c\", and \"ab\\|ab*c\" are not atomic. + +This function may return false negatives, but it will not +return false positives. It is nevertheless useful in +situations where an efficiency shortcut can be taken iff a +regexp is atomic. The function can be improved to detect +more cases of atomic regexps. Presently, this function +detects the following categories of atomic regexp; + + a group or shy group: \\(...\\) + a character class: [...] + a single character: a + +On the other hand, false negatives will be returned for +regexps that are atomic but end in operators, such as +\"a+\". I think these are rare. Probably such cases could +be detected without much effort. A guarantee of no false +negatives would require a theoretic specification of the set +of all atomic regexps." + (let ((l (length r))) + (or (equal l 1) + (and (>= l 6) + (equal (substring r 0 2) "\\(") + (equal (substring r -2) "\\)")) + (and (>= l 2) + (equal (substring r 0 1) "[") + (equal (substring r -1) "]"))))) (defun rx-syntax (form)