comparison lisp/emacs-lisp/rx.el @ 48938:05f00479612c

(rx-and): Generate a shy group. Specify `no-group' when calling rx-to-string. (rx-submatch): Specify `no-group' when calling rx-to-string. (rx-kleene): Use rx-atomic-p to decide whether to make a group. (rx-atomic-p): New function.
author Richard M. Stallman <rms@gnu.org>
date Mon, 23 Dec 2002 17:43:24 +0000
parents 14ef33c0a704
children 0d8b17d428b5
comparison
equal deleted inserted replaced
48937:3cabed8b65b7 48938:05f00479612c
59 ;; (submatch (or line-end (one-or-more (not (any ?:))))))) 59 ;; (submatch (or line-end (one-or-more (not (any ?:)))))))
60 ;; 60 ;;
61 ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" 61 ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*"
62 ;; (rx (and line-start 62 ;; (rx (and line-start
63 ;; "content-transfer-encoding:" 63 ;; "content-transfer-encoding:"
64 ;; (+ (? ?\n) blank) 64 ;; (+ (? ?\n)) blank
65 ;; "quoted-printable" 65 ;; "quoted-printable"
66 ;; (+ (? ?\n) blank)) 66 ;; (+ (? ?\n)) blank))
67 ;; 67 ;;
68 ;; (concat "^\\(?:" something-else "\\)") 68 ;; (concat "^\\(?:" something-else "\\)")
69 ;; (rx (and line-start (eval something-else))), statically or 69 ;; (rx (and line-start (eval something-else))), statically or
70 ;; (rx-to-string '(and line-start ,something-else)), dynamically. 70 ;; (rx-to-string '(and line-start ,something-else)), dynamically.
71 ;; 71 ;;
76 ;; "^;;\\s-*\n\\|^\n" 76 ;; "^;;\\s-*\n\\|^\n"
77 ;; (rx (or (and line-start ";;" (0+ space) ?\n) 77 ;; (rx (or (and line-start ";;" (0+ space) ?\n)
78 ;; (and line-start ?\n))) 78 ;; (and line-start ?\n)))
79 ;; 79 ;;
80 ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " 80 ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) "
81 ;; (rx (and "$Id": " 81 ;; (rx (and "$Id: "
82 ;; (1+ (not (in " "))) 82 ;; (1+ (not (in " ")))
83 ;; " " 83 ;; " "
84 ;; (submatch (1+ (not (in " ")))) 84 ;; (submatch (1+ (not (in " "))))
85 ;; " "))) 85 ;; " "))
86 ;; 86 ;;
87 ;; "\\\\\\\\\\[\\w+" 87 ;; "\\\\\\\\\\[\\w+"
88 ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) 88 ;; (rx (and ?\\ ?\\ ?\[ (1+ word)))
89 ;; 89 ;;
90 ;; etc. 90 ;; etc.
270 270
271 (defun rx-and (form) 271 (defun rx-and (form)
272 "Parse and produce code from FORM. 272 "Parse and produce code from FORM.
273 FORM is of the form `(and FORM1 ...)'." 273 FORM is of the form `(and FORM1 ...)'."
274 (rx-check form) 274 (rx-check form)
275 (mapconcat #'rx-to-string (cdr form) nil)) 275 (concat "\\(?:"
276 (mapconcat
277 (function (lambda (x) (rx-to-string x 'no-group)))
278 (cdr form) nil)
279 "\\)"))
276 280
277 281
278 (defun rx-or (form) 282 (defun rx-or (form)
279 "Parse and produce code from FORM, which is `(or FORM1 ...)'." 283 "Parse and produce code from FORM, which is `(or FORM1 ...)'."
280 (rx-check form) 284 (rx-check form)
382 (nth 1 form) (nth 2 form))))) 386 (nth 1 form) (nth 2 form)))))
383 387
384 388
385 (defun rx-submatch (form) 389 (defun rx-submatch (form)
386 "Parse and produce code from FORM, which is `(submatch ...)'." 390 "Parse and produce code from FORM, which is `(submatch ...)'."
387 (concat "\\(" (mapconcat #'rx-to-string (cdr form) nil) "\\)")) 391 (concat "\\("
388 392 (mapconcat (function (lambda (x) (rx-to-string x 'no-group)))
393 (cdr form) nil)
394 "\\)"))
389 395
390 (defun rx-kleene (form) 396 (defun rx-kleene (form)
391 "Parse and produce code from FORM. 397 "Parse and produce code from FORM.
392 FORM is `(OP FORM1)', where OP is one of the `zero-or-one', 398 FORM is `(OP FORM1)', where OP is one of the `zero-or-one',
393 `zero-or-more' etc. operators. 399 `zero-or-more' etc. operators.
400 ((memq (car form) '(*? +? ??)) "?") 406 ((memq (car form) '(*? +? ??)) "?")
401 (rx-greedy-flag "") 407 (rx-greedy-flag "")
402 (t "?"))) 408 (t "?")))
403 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") 409 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*")
404 ((memq (car form) '(+ +? 1+ one-or-more)) "+") 410 ((memq (car form) '(+ +? 1+ one-or-more)) "+")
405 (t "?")))) 411 (t "?")))
406 (format "\\(?:%s\\)%s%s" (rx-to-string (cadr form) 'no-group) 412 (result (rx-to-string (cadr form) 'no-group)))
407 op suffix))) 413 (if (not (rx-atomic-p result))
414 (setq result (concat "\\(?:" result "\\)")))
415 (concat result op suffix)))
416
417 (defun rx-atomic-p (r)
418 "Return non-nil if regexp string R is atomic.
419 An atomic regexp R is one such that a suffix operator
420 appended to R will apply to all of R. For example, \"a\"
421 \"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\",
422 \"[ab]c\", and \"ab\\|ab*c\" are not atomic.
423
424 This function may return false negatives, but it will not
425 return false positives. It is nevertheless useful in
426 situations where an efficiency shortcut can be taken iff a
427 regexp is atomic. The function can be improved to detect
428 more cases of atomic regexps. Presently, this function
429 detects the following categories of atomic regexp;
430
431 a group or shy group: \\(...\\)
432 a character class: [...]
433 a single character: a
434
435 On the other hand, false negatives will be returned for
436 regexps that are atomic but end in operators, such as
437 \"a+\". I think these are rare. Probably such cases could
438 be detected without much effort. A guarantee of no false
439 negatives would require a theoretic specification of the set
440 of all atomic regexps."
441 (let ((l (length r)))
442 (or (equal l 1)
443 (and (>= l 6)
444 (equal (substring r 0 2) "\\(")
445 (equal (substring r -2) "\\)"))
446 (and (>= l 2)
447 (equal (substring r 0 1) "[")
448 (equal (substring r -1) "]")))))
408 449
409 450
410 (defun rx-syntax (form) 451 (defun rx-syntax (form)
411 "Parse and produce code from FORM, which is `(syntax SYMBOL)'." 452 "Parse and produce code from FORM, which is `(syntax SYMBOL)'."
412 (rx-check form) 453 (rx-check form)