Mercurial > emacs
comparison lisp/emacs-lisp/rx.el @ 48938:05f00479612c
(rx-and): Generate a shy group.
Specify `no-group' when calling rx-to-string.
(rx-submatch): Specify `no-group' when calling rx-to-string.
(rx-kleene): Use rx-atomic-p to decide whether to make a group.
(rx-atomic-p): New function.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Mon, 23 Dec 2002 17:43:24 +0000 |
parents | 14ef33c0a704 |
children | 0d8b17d428b5 |
comparison
equal
deleted
inserted
replaced
48937:3cabed8b65b7 | 48938:05f00479612c |
---|---|
59 ;; (submatch (or line-end (one-or-more (not (any ?:))))))) | 59 ;; (submatch (or line-end (one-or-more (not (any ?:))))))) |
60 ;; | 60 ;; |
61 ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" | 61 ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" |
62 ;; (rx (and line-start | 62 ;; (rx (and line-start |
63 ;; "content-transfer-encoding:" | 63 ;; "content-transfer-encoding:" |
64 ;; (+ (? ?\n) blank) | 64 ;; (+ (? ?\n)) blank |
65 ;; "quoted-printable" | 65 ;; "quoted-printable" |
66 ;; (+ (? ?\n) blank)) | 66 ;; (+ (? ?\n)) blank)) |
67 ;; | 67 ;; |
68 ;; (concat "^\\(?:" something-else "\\)") | 68 ;; (concat "^\\(?:" something-else "\\)") |
69 ;; (rx (and line-start (eval something-else))), statically or | 69 ;; (rx (and line-start (eval something-else))), statically or |
70 ;; (rx-to-string '(and line-start ,something-else)), dynamically. | 70 ;; (rx-to-string '(and line-start ,something-else)), dynamically. |
71 ;; | 71 ;; |
76 ;; "^;;\\s-*\n\\|^\n" | 76 ;; "^;;\\s-*\n\\|^\n" |
77 ;; (rx (or (and line-start ";;" (0+ space) ?\n) | 77 ;; (rx (or (and line-start ";;" (0+ space) ?\n) |
78 ;; (and line-start ?\n))) | 78 ;; (and line-start ?\n))) |
79 ;; | 79 ;; |
80 ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " | 80 ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " |
81 ;; (rx (and "$Id": " | 81 ;; (rx (and "$Id: " |
82 ;; (1+ (not (in " "))) | 82 ;; (1+ (not (in " "))) |
83 ;; " " | 83 ;; " " |
84 ;; (submatch (1+ (not (in " ")))) | 84 ;; (submatch (1+ (not (in " ")))) |
85 ;; " "))) | 85 ;; " ")) |
86 ;; | 86 ;; |
87 ;; "\\\\\\\\\\[\\w+" | 87 ;; "\\\\\\\\\\[\\w+" |
88 ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) | 88 ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) |
89 ;; | 89 ;; |
90 ;; etc. | 90 ;; etc. |
270 | 270 |
271 (defun rx-and (form) | 271 (defun rx-and (form) |
272 "Parse and produce code from FORM. | 272 "Parse and produce code from FORM. |
273 FORM is of the form `(and FORM1 ...)'." | 273 FORM is of the form `(and FORM1 ...)'." |
274 (rx-check form) | 274 (rx-check form) |
275 (mapconcat #'rx-to-string (cdr form) nil)) | 275 (concat "\\(?:" |
276 (mapconcat | |
277 (function (lambda (x) (rx-to-string x 'no-group))) | |
278 (cdr form) nil) | |
279 "\\)")) | |
276 | 280 |
277 | 281 |
278 (defun rx-or (form) | 282 (defun rx-or (form) |
279 "Parse and produce code from FORM, which is `(or FORM1 ...)'." | 283 "Parse and produce code from FORM, which is `(or FORM1 ...)'." |
280 (rx-check form) | 284 (rx-check form) |
382 (nth 1 form) (nth 2 form))))) | 386 (nth 1 form) (nth 2 form))))) |
383 | 387 |
384 | 388 |
385 (defun rx-submatch (form) | 389 (defun rx-submatch (form) |
386 "Parse and produce code from FORM, which is `(submatch ...)'." | 390 "Parse and produce code from FORM, which is `(submatch ...)'." |
387 (concat "\\(" (mapconcat #'rx-to-string (cdr form) nil) "\\)")) | 391 (concat "\\(" |
388 | 392 (mapconcat (function (lambda (x) (rx-to-string x 'no-group))) |
393 (cdr form) nil) | |
394 "\\)")) | |
389 | 395 |
390 (defun rx-kleene (form) | 396 (defun rx-kleene (form) |
391 "Parse and produce code from FORM. | 397 "Parse and produce code from FORM. |
392 FORM is `(OP FORM1)', where OP is one of the `zero-or-one', | 398 FORM is `(OP FORM1)', where OP is one of the `zero-or-one', |
393 `zero-or-more' etc. operators. | 399 `zero-or-more' etc. operators. |
400 ((memq (car form) '(*? +? ??)) "?") | 406 ((memq (car form) '(*? +? ??)) "?") |
401 (rx-greedy-flag "") | 407 (rx-greedy-flag "") |
402 (t "?"))) | 408 (t "?"))) |
403 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") | 409 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") |
404 ((memq (car form) '(+ +? 1+ one-or-more)) "+") | 410 ((memq (car form) '(+ +? 1+ one-or-more)) "+") |
405 (t "?")))) | 411 (t "?"))) |
406 (format "\\(?:%s\\)%s%s" (rx-to-string (cadr form) 'no-group) | 412 (result (rx-to-string (cadr form) 'no-group))) |
407 op suffix))) | 413 (if (not (rx-atomic-p result)) |
414 (setq result (concat "\\(?:" result "\\)"))) | |
415 (concat result op suffix))) | |
416 | |
417 (defun rx-atomic-p (r) | |
418 "Return non-nil if regexp string R is atomic. | |
419 An atomic regexp R is one such that a suffix operator | |
420 appended to R will apply to all of R. For example, \"a\" | |
421 \"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\", | |
422 \"[ab]c\", and \"ab\\|ab*c\" are not atomic. | |
423 | |
424 This function may return false negatives, but it will not | |
425 return false positives. It is nevertheless useful in | |
426 situations where an efficiency shortcut can be taken iff a | |
427 regexp is atomic. The function can be improved to detect | |
428 more cases of atomic regexps. Presently, this function | |
429 detects the following categories of atomic regexp; | |
430 | |
431 a group or shy group: \\(...\\) | |
432 a character class: [...] | |
433 a single character: a | |
434 | |
435 On the other hand, false negatives will be returned for | |
436 regexps that are atomic but end in operators, such as | |
437 \"a+\". I think these are rare. Probably such cases could | |
438 be detected without much effort. A guarantee of no false | |
439 negatives would require a theoretic specification of the set | |
440 of all atomic regexps." | |
441 (let ((l (length r))) | |
442 (or (equal l 1) | |
443 (and (>= l 6) | |
444 (equal (substring r 0 2) "\\(") | |
445 (equal (substring r -2) "\\)")) | |
446 (and (>= l 2) | |
447 (equal (substring r 0 1) "[") | |
448 (equal (substring r -1) "]"))))) | |
408 | 449 |
409 | 450 |
410 (defun rx-syntax (form) | 451 (defun rx-syntax (form) |
411 "Parse and produce code from FORM, which is `(syntax SYMBOL)'." | 452 "Parse and produce code from FORM, which is `(syntax SYMBOL)'." |
412 (rx-check form) | 453 (rx-check form) |