Mercurial > emacs
comparison lisp/emacs-lisp/sregex.el @ 29069:cb028b1d6345
Rewritten to take advantage of shy-groups and
intervals which makes it heaps simpler.
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Mon, 22 May 2000 04:28:02 +0000 |
parents | 6e0af7097929 |
children | f35b1d67aa8f |
comparison
equal
deleted
inserted
replaced
29068:61a23dd336ab | 29069:cb028b1d6345 |
---|---|
1 ;;; sregex.el --- symbolic regular expressions | 1 ;;; sregex.el --- symbolic regular expressions |
2 | 2 |
3 ;; Copyright (C) 1997, 1998 Free Software Foundation, Inc. | 3 ;; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc. |
4 | 4 |
5 ;; Author: Bob Glickstein <bobg+sregex@zanshin.com> | 5 ;; Author: Bob Glickstein <bobg+sregex@zanshin.com> |
6 ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com> | 6 ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com> |
7 | 7 |
8 ;; This file is part of GNU Emacs. | 8 ;; This file is part of GNU Emacs. |
46 | 46 |
47 ;; It is also unnecessary to "group" parts of the expression together | 47 ;; It is also unnecessary to "group" parts of the expression together |
48 ;; to overcome operator precedence; that also happens automatically. | 48 ;; to overcome operator precedence; that also happens automatically. |
49 ;; For example: | 49 ;; For example: |
50 | 50 |
51 ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(Bob\\|Robert\\)?" | 51 ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(?:Bob\\|Robert\\)?" |
52 | 52 |
53 ;; It *is* possible to group parts of the expression in order to refer | 53 ;; It *is* possible to group parts of the expression in order to refer |
54 ;; to them with numbered backreferences: | 54 ;; to them with numbered backreferences: |
55 | 55 |
56 ;; (sregexq (group (or "Go" "Run")) | 56 ;; (sregexq (group (or "Go" "Run")) |
57 ;; ", Spot, " | 57 ;; ", Spot, " |
58 ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1" | 58 ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1" |
59 | |
60 ;; If `sregexq' needs to introduce its own grouping parentheses, it | |
61 ;; will automatically renumber your backreferences: | |
62 | |
63 ;; (sregexq (opt "resent-") | |
64 ;; (group (or "to" "cc" "bcc")) | |
65 ;; ": " | |
66 ;; (backref 1)) => "\\(resent-\\)?\\(to\\|cc\\|bcc\\): \\2" | |
67 | 59 |
68 ;; `sregexq' is a macro. Each time it is used, it constructs a simple | 60 ;; `sregexq' is a macro. Each time it is used, it constructs a simple |
69 ;; Lisp expression that then invokes a moderately complex engine to | 61 ;; Lisp expression that then invokes a moderately complex engine to |
70 ;; interpret the sregex and render the string form. Because of this, | 62 ;; interpret the sregex and render the string form. Because of this, |
71 ;; I don't recommend sprinkling calls to `sregexq' throughout your | 63 ;; I don't recommend sprinkling calls to `sregexq' throughout your |
97 ;; (let ((dotstar '(0+ any)) | 89 ;; (let ((dotstar '(0+ any)) |
98 ;; (whitespace '(1+ (syntax ?-))) | 90 ;; (whitespace '(1+ (syntax ?-))) |
99 ;; (digits '(1+ (char (?0 . ?9))))) | 91 ;; (digits '(1+ (char (?0 . ?9))))) |
100 ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+" | 92 ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+" |
101 | 93 |
102 ;; This package also provides sregex-specific versions of the Emacs | |
103 ;; functions `replace-match', `match-string', | |
104 ;; `match-string-no-properties', `match-beginning', `match-end', and | |
105 ;; `match-data'. In each case, the sregex version's name begins with | |
106 ;; `sregex-' and takes one additional optional parameter, an sregex | |
107 ;; "info" object. Each of these functions is concerned with numbered | |
108 ;; submatches. Since sregex may renumber submatches, alternate | |
109 ;; versions of these functions are needed that know how to adjust the | |
110 ;; supplied number. | |
111 | |
112 ;; The sregex info object for the most recently evaluated sregex can | |
113 ;; be obtained with `sregex-info'; so if you precompute your sregexes | |
114 ;; and you plan to use `replace-match' or one of the others with it, | |
115 ;; you need to record the info object for later use: | |
116 | |
117 ;; (let* ((regex (sregexq (opt "resent-") | |
118 ;; (group (or "to" "cc" "bcc")) | |
119 ;; ":")) | |
120 ;; (regex-info (sregex-info))) | |
121 ;; ... | |
122 ;; (if (re-search-forward regex ...) | |
123 ;; (let ((which (sregex-match-string 1 nil regex-info))) | |
124 ;; ...))) | |
125 | |
126 ;; In this example, `regex' is "\\(resent-\\)?\\(to\\|cc\\|bcc\\):", | |
127 ;; so the call to (sregex-match-string 1 ...) is automatically turned | |
128 ;; into a call to (match-string 2 ...). | |
129 | |
130 ;; If the sregex info argument to `sregex-replace-match', | |
131 ;; `sregex-match-string', `sregex-match-string-no-properties', | |
132 ;; `sregex-match-beginning', `sregex-match-end', or | |
133 ;; `sregex-match-data' is omitted, the current value of (sregex-info) | |
134 ;; is used. | |
135 | |
136 ;; You can do your own sregex submatch renumbering with | |
137 ;; `sregex-backref-num'. | |
138 | |
139 ;; Finally, `sregex-save-match-data' is like `save-match-data' but | |
140 ;; also saves and restores the information maintained by | |
141 ;; `sregex-info'. | |
142 | |
143 ;; To use this package in a Lisp program, simply (require 'sregex). | 94 ;; To use this package in a Lisp program, simply (require 'sregex). |
144 | 95 |
145 ;; Here are the clauses allowed in an `sregex' or `sregexq' | 96 ;; Here are the clauses allowed in an `sregex' or `sregexq' |
146 ;; expression: | 97 ;; expression: |
147 | 98 |
163 ;; - (group CLAUSE ...) | 114 ;; - (group CLAUSE ...) |
164 ;; Groups the given CLAUSEs using "\\(" and "\\)". | 115 ;; Groups the given CLAUSEs using "\\(" and "\\)". |
165 | 116 |
166 ;; - (sequence CLAUSE ...) | 117 ;; - (sequence CLAUSE ...) |
167 | 118 |
168 ;; Groups the given CLAUSEs; may or may not use "\\(" and "\\)". | 119 ;; Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)". |
169 ;; Clauses groups by `sequence' do not count for purposes of | 120 ;; Clauses grouped by `sequence' do not count for purposes of |
170 ;; numbering backreferences. Use `sequence' in situations like | 121 ;; numbering backreferences. Use `sequence' in situations like |
171 ;; this: | 122 ;; this: |
172 | 123 |
173 ;; (sregexq (or "dog" "cat" | 124 ;; (sregexq (or "dog" "cat" |
174 ;; (sequence (opt "sea ") "monkey"))) | 125 ;; (sequence (opt "sea ") "monkey"))) |
175 ;; => "dog\\|cat\\|\\(sea \\)?monkey" | 126 ;; => "dog\\|cat\\|\\(?:sea \\)?monkey" |
176 | 127 |
177 ;; where a single `or' alternate needs to contain multiple | 128 ;; where a single `or' alternate needs to contain multiple |
178 ;; subclauses. | 129 ;; subclauses. |
179 | 130 |
180 ;; - (backref N) | 131 ;; - (backref N) |
181 ;; Matches the same string previously matched by the Nth "group" in | 132 ;; Matches the same string previously matched by the Nth "group" in |
182 ;; the same sregex. N is a positive integer. In the resulting | 133 ;; the same sregex. N is a positive integer. |
183 ;; regex, N may be adjusted to account for automatically introduced | |
184 ;; groups. | |
185 | 134 |
186 ;; - (or CLAUSE ...) | 135 ;; - (or CLAUSE ...) |
187 ;; Matches any one of the CLAUSEs by separating them with "\\|". | 136 ;; Matches any one of the CLAUSEs by separating them with "\\|". |
188 | 137 |
189 ;; - (0+ CLAUSE ...) | 138 ;; - (0+ CLAUSE ...) |
274 ;; Where MIN and MAX are characters, adds the range of characters | 223 ;; Where MIN and MAX are characters, adds the range of characters |
275 ;; from MIN through MAX to the set. | 224 ;; from MIN through MAX to the set. |
276 | 225 |
277 ;;; To do: | 226 ;;; To do: |
278 | 227 |
279 ;; Make (sregexq (or "a" (sequence "b" "c"))) return "a\\|bc" instead | |
280 ;; of "a\\|\\(bc\\)" | |
281 | |
282 ;; An earlier version of this package could optionally translate the | 228 ;; An earlier version of this package could optionally translate the |
283 ;; symbolic regex into other languages' syntaxes, e.g. Perl. For | 229 ;; symbolic regex into other languages' syntaxes, e.g. Perl. For |
284 ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would | 230 ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would |
285 ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore | 231 ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore |
286 ;; such a facility. | 232 ;; such a facility. |
287 | 233 |
234 ;; - handle multibyte chars in sregex--char-aux | |
235 ;; - add support for character classes ([:blank:], ...) | |
236 ;; - add support for non-greedy operators *? and +? | |
237 ;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?" | |
238 | |
288 ;;; Bugs: | 239 ;;; Bugs: |
289 | 240 |
290 ;; The (regex REGEX) form can confuse the code that distinguishes | |
291 ;; introduced groups from user-specified groups. Try to avoid using | |
292 ;; grouping within a `regex' form. Failing that, try to avoid using | |
293 ;; backrefs if you're using `regex'. | |
294 | |
295 ;;; Code: | 241 ;;; Code: |
296 | 242 |
297 (defsubst sregex--value-unitp (val) (nth 0 val)) | 243 (eval-when-compile (require 'cl)) |
298 (defsubst sregex--value-groups (val) (nth 1 val)) | 244 |
299 (defsubst sregex--value-tree (val) (nth 2 val)) | 245 ;; Compatibility code for when we didn't have shy-groups |
300 | 246 (defvar sregex--current-sregex nil) |
301 (defun sregex--make-value (unitp groups tree) | 247 (defun sregex-info () nil) |
302 (list unitp groups tree)) | 248 (defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms)) |
303 | 249 (defun sregex-replace-match (r &optional f l str subexp x) |
304 (defvar sregex--current-sregex nil | 250 (replace-match r f l str subexp)) |
305 "Global state for `sregex-info'.") | 251 (defun sregex-match-string (c &optional i x) (match-string c i)) |
306 | |
307 (defun sregex-info () | |
308 "Return extra information about the latest call to `sregex'. | |
309 This extra information is needed in order to adjust user-requested | |
310 backreference numbers to numbers suitable for the generated regexp. | |
311 See e.g. `sregex-match-string' and `sregex-backref-num'." | |
312 sregex--current-sregex) | |
313 | |
314 ; (require 'advice) | |
315 ; (defadvice save-match-data (around sregex-save-match-data protect) | |
316 ; (let ((sregex--saved-sregex sregex--current-sregex)) | |
317 ; (unwind-protect | |
318 ; ad-do-it | |
319 ; (setq sregex--current-sregex sregex--saved-sregex)))) | |
320 (defmacro sregex-save-match-data (&rest forms) | |
321 "Like `save-match-data', but also saves and restores `sregex-info' data." | |
322 `(let ((sregex--saved-sregex sregex--current-sregex)) | |
323 (unwind-protect | |
324 (save-match-data ,@forms) | |
325 (setq sregex--current-sregex sregex--saved-sregex)))) | |
326 | |
327 (defun sregex-replace-match (replacement | |
328 &optional fixedcase literal string subexp sregex) | |
329 "Like `replace-match', for a regexp made with `sregex'. | |
330 This takes one additional optional argument, the `sregex' info, which | |
331 can be obtained with `sregex-info'. The SUBEXP argument is adjusted | |
332 to allow for \"introduced groups\". If the extra argument is omitted | |
333 or nil, it defaults to the current value of (sregex-info)." | |
334 (replace-match replacement fixedcase literal string | |
335 (and subexp | |
336 (sregex-backref-num subexp sregex)))) | |
337 | |
338 (defun sregex-match-string (count &optional in-string sregex) | |
339 "Like `match-string', for a regexp made with `sregex'. | |
340 This takes one additional optional argument, the `sregex' info, which | |
341 can be obtained with `sregex-info'. The COUNT argument is adjusted to | |
342 allow for \"introduced groups\". If the extra argument is omitted or | |
343 nil, it defaults to the current value of (sregex-info)." | |
344 (match-string (and count | |
345 (sregex-backref-num count sregex)) | |
346 in-string)) | |
347 | |
348 (defun sregex-match-string-no-properties (count &optional in-string sregex) | 252 (defun sregex-match-string-no-properties (count &optional in-string sregex) |
349 "Like `match-string-no-properties', for a regexp made with `sregex'. | 253 (match-string-no-properties count in-string)) |
350 This takes one additional optional argument, the `sregex' info, which | 254 (defun sregex-match-beginning (count &optional sregex) (match-beginning count)) |
351 can be obtained with `sregex-info'. The COUNT argument is adjusted to | 255 (defun sregex-match-end (count &optional sregex) (match-end count)) |
352 allow for \"introduced groups\". If the extra argument is omitted or | 256 (defun sregex-match-data (&optional sregex) (match-data)) |
353 nil, it defaults to the current value of (sregex-info)." | 257 (defun sregex-backref-num (n &optional sregex) n) |
354 (match-string-no-properties | 258 |
355 (and count | |
356 (sregex-backref-num count sregex)) | |
357 in-string)) | |
358 | |
359 (defun sregex-match-beginning (count &optional sregex) | |
360 "Like `match-beginning', for a regexp made with `sregex'. | |
361 This takes one additional optional argument, the `sregex' info, which | |
362 can be obtained with `sregex-info'. The COUNT argument is adjusted to | |
363 allow for \"introduced groups\". If the extra argument is omitted or | |
364 nil, it defaults to the current value of (sregex-info)." | |
365 (match-beginning (sregex-backref-num count sregex))) | |
366 | |
367 (defun sregex-match-end (count &optional sregex) | |
368 "Like `match-end', for a regexp made with `sregex'. | |
369 This takes one additional optional argument, the `sregex' info, which | |
370 can be obtained with `sregex-info'. The COUNT argument is adjusted to | |
371 allow for \"introduced groups\". If the extra argument is omitted or | |
372 nil, it defaults to the current value of (sregex-info)." | |
373 (match-end (sregex-backref-num count sregex))) | |
374 | |
375 (defun sregex-match-data (&optional sregex) | |
376 "Like `match-data', for a regexp made with `sregex'. | |
377 This takes one additional optional argument, the `sregex' info, which | |
378 can be obtained with `sregex-info'. \"Introduced groups\" are removed | |
379 from the result. If the extra argument is omitted or nil, it defaults | |
380 to the current value of (sregex-info)." | |
381 (let* ((data (match-data)) | |
382 (groups (sregex--value-groups (or sregex | |
383 sregex--current-sregex))) | |
384 (result (list (car (cdr data)) | |
385 (car data)))) | |
386 (setq data (cdr (cdr data))) | |
387 (while data | |
388 (if (car groups) | |
389 (setq result (append (list (car (cdr data)) | |
390 (car data)) | |
391 result))) | |
392 (setq groups (cdr groups) | |
393 data (cdr (cdr data)))) | |
394 (reverse result))) | |
395 | |
396 (defun sregex--render-tree (tree sregex) | |
397 (let ((key (car tree))) | |
398 (cond ((eq key 'str) | |
399 (cdr tree)) | |
400 ((eq key 'or) | |
401 (mapconcat '(lambda (x) | |
402 (sregex--render-tree x sregex)) | |
403 (cdr tree) | |
404 "\\|")) | |
405 ((eq key 'sequence) | |
406 (apply 'concat | |
407 (mapcar '(lambda (x) | |
408 (sregex--render-tree x sregex)) | |
409 (cdr tree)))) | |
410 ((eq key 'group) | |
411 (concat "\\(" | |
412 (sregex--render-tree (cdr tree) sregex) | |
413 "\\)")) | |
414 ((eq key 'opt) | |
415 (concat (sregex--render-tree (cdr tree) sregex) | |
416 "?")) | |
417 ((eq key '0+) | |
418 (concat (sregex--render-tree (cdr tree) sregex) | |
419 "*")) | |
420 ((eq key '1+) | |
421 (concat (sregex--render-tree (cdr tree) sregex) | |
422 "+")) | |
423 ((eq key 'backref) | |
424 (let ((num (sregex-backref-num (cdr tree) sregex))) | |
425 (if (> num 9) | |
426 (error "sregex: backref number %d too high after adjustment" | |
427 num) | |
428 (concat "\\" (int-to-string num))))) | |
429 (t (error "sregex internal error: unknown tree type %S" | |
430 key))))) | |
431 | 259 |
432 (defun sregex (&rest exps) | 260 (defun sregex (&rest exps) |
433 "Symbolic regular expression interpreter. | 261 "Symbolic regular expression interpreter. |
434 This is exactly like `sregexq' (q.v.) except that it evaluates all its | 262 This is exactly like `sregexq' (q.v.) except that it evaluates all its |
435 arguments, so literal sregex clauses must be quoted. For example: | 263 arguments, so literal sregex clauses must be quoted. For example: |
441 | 269 |
442 (let ((dotstar '(0+ any)) | 270 (let ((dotstar '(0+ any)) |
443 (whitespace '(1+ (syntax ?-))) | 271 (whitespace '(1+ (syntax ?-))) |
444 (digits '(1+ (char (?0 . ?9))))) | 272 (digits '(1+ (char (?0 . ?9))))) |
445 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"" | 273 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"" |
446 (progn | 274 (sregex--sequence exps nil)) |
447 (setq sregex--current-sregex (sregex--sequence exps nil)) | |
448 (sregex--render-tree (sregex--value-tree sregex--current-sregex) | |
449 sregex--current-sregex))) | |
450 | 275 |
451 (defmacro sregexq (&rest exps) | 276 (defmacro sregexq (&rest exps) |
452 "Symbolic regular expression interpreter. | 277 "Symbolic regular expression interpreter. |
453 This macro allows you to specify a regular expression (regexp) in | 278 This macro allows you to specify a regular expression (regexp) in |
454 symbolic form, and converts it into the string form required by Emacs's | 279 symbolic form, and converts it into the string form required by Emacs's |
544 Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\". | 369 Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\". |
545 | 370 |
546 - (sequence CLAUSE ...) | 371 - (sequence CLAUSE ...) |
547 | 372 |
548 Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\". | 373 Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\". |
549 Clauses groups by `sequence' do not count for purposes of | 374 Clauses grouped by `sequence' do not count for purposes of |
550 numbering backreferences. Use `sequence' in situations like | 375 numbering backreferences. Use `sequence' in situations like |
551 this: | 376 this: |
552 | 377 |
553 (sregexq (or \"dog\" \"cat\" | 378 (sregexq (or \"dog\" \"cat\" |
554 (sequence (opt \"sea \") \"monkey\"))) | 379 (sequence (opt \"sea \") \"monkey\"))) |
555 => \"dog\\\\|cat\\\\|\\\\(sea \\\\)?monkey\" | 380 => \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\" |
556 | 381 |
557 where a single `or' alternate needs to contain multiple | 382 where a single `or' alternate needs to contain multiple |
558 subclauses. | 383 subclauses. |
559 | 384 |
560 - (backref N) | 385 - (backref N) |
561 Matches the same string previously matched by the Nth \"group\" in | 386 Matches the same string previously matched by the Nth \"group\" in |
562 the same sregex. N is a positive integer. In the resulting | 387 the same sregex. N is a positive integer. |
563 regex, N may be adjusted to account for automatically introduced | |
564 groups. | |
565 | 388 |
566 - (or CLAUSE ...) | 389 - (or CLAUSE ...) |
567 Matches any one of the CLAUSEs by separating them with \"\\\\|\". | 390 Matches any one of the CLAUSEs by separating them with \"\\\\|\". |
568 | 391 |
569 - (0+ CLAUSE ...) | 392 - (0+ CLAUSE ...) |
637 | 460 |
638 - (regex REGEX) | 461 - (regex REGEX) |
639 This is a \"trapdoor\" for including ordinary regular expression | 462 This is a \"trapdoor\" for including ordinary regular expression |
640 strings in the result. Some regular expressions are clearer when | 463 strings in the result. Some regular expressions are clearer when |
641 written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for | 464 written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for |
642 instance. However, using this can confuse the code that | 465 instance. |
643 distinguishes introduced groups from user-specified groups. Avoid | |
644 using grouping within a `regex' form. Failing that, avoid using | |
645 backrefs if you're using `regex'. | |
646 | 466 |
647 Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...) | 467 Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...) |
648 has one of the following forms: | 468 has one of the following forms: |
649 | 469 |
650 - a character | 470 - a character |
657 Where MIN and MAX are characters, adds the range of characters | 477 Where MIN and MAX are characters, adds the range of characters |
658 from MIN through MAX to the set." | 478 from MIN through MAX to the set." |
659 `(apply 'sregex ',exps)) | 479 `(apply 'sregex ',exps)) |
660 | 480 |
661 (defun sregex--engine (exp combine) | 481 (defun sregex--engine (exp combine) |
662 (let* ((val (cond ((stringp exp) | 482 (cond |
663 (sregex--make-value (or (not (eq combine 'suffix)) | 483 ((stringp exp) |
664 (= (length exp) 1)) | 484 (if (and combine |
665 nil | 485 (eq combine 'suffix) |
666 (cons 'str | 486 (/= (length exp) 1)) |
667 (regexp-quote exp)))) | 487 (concat "\\(?:" (regexp-quote exp) "\\)") |
668 ((symbolp exp) | 488 (regexp-quote exp))) |
669 (funcall (intern (concat "sregex--" | 489 ((symbolp exp) |
670 (symbol-name exp))) | 490 (ecase exp |
671 combine)) | 491 (any ".") |
672 ((consp exp) | 492 (bol "^") |
673 (funcall (intern (concat "sregex--" | 493 (eol "$") |
674 (symbol-name (car exp)))) | 494 (wordchar "\\w") |
675 (cdr exp) | 495 (not-wordchar "\\W") |
676 combine)) | 496 (bot "\\`") |
677 (t (error "Invalid expression: %s" exp)))) | 497 (eot "\\'") |
678 (unitp (sregex--value-unitp val)) | 498 (point "\\=") |
679 (groups (sregex--value-groups val)) | 499 (word-boundary "\\b") |
680 (tree (sregex--value-tree val))) | 500 (not-word-boundary "\\B") |
681 (if (and combine (not unitp)) | 501 (bow "\\<") |
682 (sregex--make-value t | 502 (eow "\\>"))) |
683 (cons nil groups) | 503 ((consp exp) |
684 (cons 'group tree)) | 504 (funcall (intern (concat "sregex--" |
685 (sregex--make-value unitp groups tree)))) | 505 (symbol-name (car exp)))) |
506 (cdr exp) | |
507 combine)) | |
508 (t (error "Invalid expression: %s" exp)))) | |
686 | 509 |
687 (defun sregex--sequence (exps combine) | 510 (defun sregex--sequence (exps combine) |
688 (if (= (length exps) 1) | 511 (if (= (length exps) 1) (sregex--engine (car exps) combine) |
689 (sregex--engine (car exps) combine) | 512 (let ((re (mapconcat |
690 (let ((groups nil) | 513 (lambda (e) (sregex--engine e 'concat)) |
691 (trees nil)) ;grows in reverse | 514 exps ""))) |
692 (while exps | |
693 (let ((val (sregex--engine (car exps) 'concat))) | |
694 (setq groups (append groups | |
695 (sregex--value-groups val)) | |
696 trees (cons (sregex--value-tree val) trees) | |
697 exps (cdr exps)))) | |
698 (setq trees (nreverse trees)) | |
699 (if (eq combine 'suffix) | 515 (if (eq combine 'suffix) |
700 (sregex--make-value t | 516 (concat "\\(?:" re "\\)") |
701 (cons nil groups) | 517 re)))) |
702 (cons 'group | 518 |
703 (cons 'sequence trees))) | 519 (defun sregex--or (exps combine) |
704 (sregex--make-value (not (eq combine 'suffix)) | 520 (if (= (length exps) 1) (sregex--engine (car exps) combine) |
705 groups | 521 (let ((re (mapconcat |
706 (cons 'sequence trees)))))) | 522 (lambda (e) (sregex--engine e 'or)) |
707 | 523 exps "\\|"))) |
708 (defun sregex--group (exps combine) | 524 (if (not (eq combine 'or)) |
709 (let ((val (sregex--sequence exps nil))) | 525 (concat "\\(?:" re "\\)") |
710 (sregex--make-value t | 526 re)))) |
711 (cons t (sregex--value-groups val)) | 527 |
712 (cons 'group (sregex--value-tree val))))) | 528 (defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)")) |
713 | 529 |
714 (defun sregex-backref-num (n &optional sregex) | 530 (defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps)))) |
715 "Adjust backreference number N according to SREGEX. | 531 (defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?")) |
716 When `sregex' introduces parenthesized groups that the user didn't ask | 532 (defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*")) |
717 for, the numbering of the groups that the user *did* ask for gets all | 533 (defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+")) |
718 out of whack. This function accounts for introduced groups. Example: | 534 |
719 | 535 (defun sregex--char (exps combine) (sregex--char-aux nil exps)) |
720 (sregexq (opt \"ab\") | 536 (defun sregex--not-char (exps combine) (sregex--char-aux t exps)) |
721 (group (or \"c\" \"d\"))) => \"\\\\(ab\\\\)?\\\\(c\\\\|d\\\\)\" | 537 |
722 (setq info (sregex-info)) | 538 (defun sregex--syntax (exps combine) (format "\\s%c" (car exps))) |
723 (sregex-backref-num 1 info) => 2 | 539 (defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps))) |
724 | 540 |
725 The SREGEX parameter is optional and defaults to the current value of | 541 (defun sregex--regex (exps combine) |
726 `sregex-info'." | 542 (if combine (concat "\\(?:" (car exps) "\\)") (car exps))) |
727 (let ((groups (sregex--value-groups (or sregex | |
728 sregex--current-sregex))) | |
729 (result 0)) | |
730 (while (and groups (> n 0)) | |
731 (if (car groups) | |
732 (setq n (1- n))) | |
733 (setq result (1+ result) | |
734 groups (cdr groups))) | |
735 result)) | |
736 | |
737 (defun sregex--backref (exps combine) | |
738 (sregex--make-value t nil (cons 'backref (car exps)))) | |
739 | |
740 (defun sregex--any (combine) | |
741 (sregex--make-value t nil '(str . "."))) | |
742 | |
743 (defun sregex--opt (exps combine) | |
744 (let ((val (sregex--sequence exps 'suffix))) | |
745 (sregex--make-value t | |
746 (sregex--value-groups val) | |
747 (cons 'opt (sregex--value-tree val))))) | |
748 | |
749 (defun sregex--0+ (exps combine) | |
750 (let ((val (sregex--sequence exps 'suffix))) | |
751 (sregex--make-value t | |
752 (sregex--value-groups val) | |
753 (cons '0+ (sregex--value-tree val))))) | |
754 (defun sregex--1+ (exps combine) | |
755 (let ((val (sregex--sequence exps 'suffix))) | |
756 (sregex--make-value t | |
757 (sregex--value-groups val) | |
758 (cons '1+ (sregex--value-tree val))))) | |
759 | 543 |
760 (defun sregex--repeat (exps combine) | 544 (defun sregex--repeat (exps combine) |
761 (let ((min (or (car exps) 0)) | 545 (let* ((min (or (pop exps) 0)) |
762 (max (car (cdr exps)))) | 546 (minstr (number-to-string min)) |
763 (setq exps (cdr (cdr exps))) | 547 (max (pop exps))) |
764 (cond ((zerop min) | 548 (concat (sregex--sequence exps 'suffix) |
765 (cond ((equal max 0) ;degenerate | 549 (concat "\\{" minstr "," |
766 (sregex--make-value t nil nil)) | 550 (when max (number-to-string max)) "\\}")))) |
767 ((equal max 1) | 551 |
768 (sregex--opt exps combine)) | 552 (defun sregex--char-range (start end) |
769 ((not max) | 553 (let ((startc (char-to-string start)) |
770 (sregex--0+ exps combine)) | 554 (endc (char-to-string end))) |
771 (t (sregex--sequence (make-list max | 555 (cond |
772 (cons 'opt exps)) | 556 ((> end (+ start 2)) (concat startc "-" endc)) |
773 combine)))) | 557 ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc)) |
774 ((= min 1) | 558 ((> end start) (concat startc endc)) |
775 (cond ((equal max 1) | 559 (t startc)))) |
776 (sregex--sequence exps combine)) | |
777 ((not max) | |
778 (sregex--1+ exps combine)) | |
779 (t (sregex--sequence (append exps | |
780 (make-list (1- max) | |
781 (cons 'opt exps))) | |
782 combine)))) | |
783 (t (sregex--sequence (append exps | |
784 (list (append (list 'repeat | |
785 (1- min) | |
786 (and max | |
787 (1- max))) | |
788 exps))) | |
789 combine))))) | |
790 | |
791 (defun sregex--or (exps combine) | |
792 (if (= (length exps) 1) | |
793 (sregex--engine (car exps) combine) | |
794 (let ((groups nil) | |
795 (trees nil)) | |
796 (while exps | |
797 (let ((val (sregex--engine (car exps) 'or))) | |
798 (setq groups (append groups | |
799 (sregex--value-groups val)) | |
800 trees (cons (sregex--value-tree val) trees) | |
801 exps (cdr exps)))) | |
802 (sregex--make-value (eq combine 'or) | |
803 groups | |
804 (cons 'or (nreverse trees)))))) | |
805 | |
806 (defmacro sregex--char-range-aux () | |
807 '(if start | |
808 (let (startc endc) | |
809 (if (and (<= 32 start) | |
810 (<= start 127)) | |
811 (setq startc (char-to-string start) | |
812 endc (char-to-string end)) | |
813 (setq startc (format "\\%03o" start) | |
814 endc (format "\\%03o" end))) | |
815 (if (> end start) | |
816 (if (> end (+ start 1)) | |
817 (setq class (concat class startc "-" endc)) | |
818 (setq class (concat class startc endc))) | |
819 (setq class (concat class startc)))))) | |
820 | |
821 (defmacro sregex--char-range (rstart rend) | |
822 `(let ((i ,rstart) | |
823 start end) | |
824 (while (<= i ,rend) | |
825 (if (aref chars i) | |
826 (progn | |
827 (if start | |
828 (setq end i) | |
829 (setq start i | |
830 end i)) | |
831 (aset chars i nil)) | |
832 (sregex--char-range-aux) | |
833 (setq start nil | |
834 end nil)) | |
835 (setq i (1+ i))) | |
836 (sregex--char-range-aux))) | |
837 | 560 |
838 (defun sregex--char-aux (complement args) | 561 (defun sregex--char-aux (complement args) |
839 (let ((chars (make-vector 256 nil))) | 562 ;; regex-opt does the same, we should join effort. |
840 (while args | 563 (let ((chars (make-bool-vector 256 nil))) ; Yeah, right! |
841 (let ((arg (car args))) | 564 (dolist (arg args) |
842 (cond ((integerp arg) | 565 (cond ((integerp arg) (aset chars arg t)) |
843 (aset chars arg t)) | 566 ((stringp arg) (mapcar (lambda (c) (aset chars c t)) arg)) |
844 ((stringp arg) | 567 ((consp arg) |
845 (mapcar (function | 568 (let ((start (car arg)) |
846 (lambda (c) | 569 (end (cdr arg))) |
847 (aset chars c t))) | 570 (when (> start end) |
848 arg)) | 571 (let ((tmp start)) (setq start end) (setq end tmp))) |
849 ((consp arg) | 572 ;; now start <= end |
850 (let ((start (car arg)) | 573 (let ((i start)) |
851 (end (cdr arg))) | 574 (while (<= i end) |
852 (if (> start end) | 575 (aset chars i t) |
853 (let ((tmp start)) | 576 (setq i (1+ i)))))))) |
854 (setq start end | |
855 end tmp))) | |
856 ;; now start <= end | |
857 (let ((i start)) | |
858 (while (<= i end) | |
859 (aset chars i t) | |
860 (setq i (1+ i)))))))) | |
861 (setq args (cdr args))) | |
862 ;; now chars is a map of the characters in the class | 577 ;; now chars is a map of the characters in the class |
863 (let ((class "") | 578 (let ((caret (aref chars ?^)) |
864 (caret (aref chars ?^))) | 579 (dash (aref chars ?-)) |
580 (class (if (aref chars ?\]) "]" ""))) | |
865 (aset chars ?^ nil) | 581 (aset chars ?^ nil) |
866 (if (aref chars ?\]) | 582 (aset chars ?- nil) |
867 (progn | 583 (aset chars ?\] nil) |
868 (setq class (concat class "]")) | 584 |
869 (aset chars ?\] nil))) | 585 (let (start end) |
870 (if (aref chars ?-) | 586 (dotimes (i 256) |
871 (progn | 587 (if (aref chars i) |
872 (setq class (concat class "-")) | 588 (progn |
873 (aset chars ?- nil))) | 589 (unless start (setq start i)) |
874 (if (aref chars ?\\) | 590 (setq end i) |
875 (progn | 591 (aset chars i nil)) |
876 (setq class (concat class "\\\\")) | 592 (when start |
877 (aset chars ?\\ nil))) | 593 (setq class (concat class (sregex--char-range start end))) |
878 | 594 (setq start nil)))) |
879 (sregex--char-range ?A ?Z) | 595 (if start |
880 (sregex--char-range ?a ?z) | 596 (setq class (concat class (sregex--char-range start end))))) |
881 (sregex--char-range ?0 ?9) | 597 |
882 | 598 (if (> (length class) 0) |
883 (let ((i 32)) | 599 (setq class (concat class (if caret "^") (if dash "-"))) |
884 (while (< i 128) | 600 (setq class (concat class (if dash "-") (if caret "^")))) |
885 (if (aref chars i) | 601 (if (and (not complement) (= (length class) 1)) |
886 (progn | 602 (regexp-quote class) |
887 (setq class (concat class (char-to-string i))) | 603 (concat "[" (if complement "^") class "]"))))) |
888 (aset chars i nil))) | |
889 (setq i (1+ i)))) | |
890 | |
891 (sregex--char-range 0 31) | |
892 (sregex--char-range 128 255) | |
893 | |
894 (let ((i 0)) | |
895 (while (< i 256) | |
896 (if (aref chars i) | |
897 (setq class (concat class (format "\\%03o" i)))) | |
898 (setq i (1+ i)))) | |
899 | |
900 (if caret | |
901 (setq class (concat class "^"))) | |
902 (concat "[" (if complement "^") class "]")))) | |
903 | |
904 (defun sregex--char (exps combine) | |
905 (sregex--make-value t nil (cons 'str (sregex--char-aux nil exps)))) | |
906 (defun sregex--not-char (exps combine) | |
907 (sregex--make-value t nil (cons 'str (sregex--char-aux t exps)))) | |
908 | |
909 (defun sregex--bol (combine) | |
910 (sregex--make-value t nil '(str . "^"))) | |
911 (defun sregex--eol (combine) | |
912 (sregex--make-value t nil '(str . "$"))) | |
913 | |
914 (defun sregex--wordchar (combine) | |
915 (sregex--make-value t nil '(str . "\\w"))) | |
916 (defun sregex--not-wordchar (combine) | |
917 (sregex--make-value t nil '(str . "\\W"))) | |
918 | |
919 (defun sregex--syntax (exps combine) | |
920 (sregex--make-value t nil (cons 'str (format "\\s%c" (car exps))))) | |
921 (defun sregex--not-syntax (exps combine) | |
922 (sregex--make-value t nil (cons 'str (format "\\S%c" (car exps))))) | |
923 | |
924 (defun sregex--bot (combine) | |
925 (sregex--make-value t nil (cons 'str "\\`"))) | |
926 (defun sregex--eot (combine) | |
927 (sregex--make-value t nil (cons 'str "\\'"))) | |
928 | |
929 (defun sregex--point (combine) | |
930 (sregex--make-value t nil '(str . "\\="))) | |
931 | |
932 (defun sregex--word-boundary (combine) | |
933 (sregex--make-value t nil '(str . "\\b"))) | |
934 (defun sregex--not-word-boundary (combine) | |
935 (sregex--make-value t nil '(str . "\\B"))) | |
936 | |
937 (defun sregex--bow (combine) | |
938 (sregex--make-value t nil '(str . "\\<"))) | |
939 (defun sregex--eow (combine) | |
940 (sregex--make-value t nil '(str . "\\>"))) | |
941 | |
942 | |
943 ;; trapdoor - usage discouraged | |
944 (defun sregex--regex (exps combine) | |
945 (sregex--make-value nil nil (car exps))) | |
946 | 604 |
947 (provide 'sregex) | 605 (provide 'sregex) |
948 | 606 |
949 ;;; sregex.el ends here | 607 ;;; sregex.el ends here |
950 | 608 |