comparison lisp/emacs-lisp/sregex.el @ 29069:cb028b1d6345

Rewritten to take advantage of shy-groups and intervals which makes it heaps simpler.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Mon, 22 May 2000 04:28:02 +0000
parents 6e0af7097929
children f35b1d67aa8f
comparison
equal deleted inserted replaced
29068:61a23dd336ab 29069:cb028b1d6345
1 ;;; sregex.el --- symbolic regular expressions 1 ;;; sregex.el --- symbolic regular expressions
2 2
3 ;; Copyright (C) 1997, 1998 Free Software Foundation, Inc. 3 ;; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc.
4 4
5 ;; Author: Bob Glickstein <bobg+sregex@zanshin.com> 5 ;; Author: Bob Glickstein <bobg+sregex@zanshin.com>
6 ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com> 6 ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com>
7 7
8 ;; This file is part of GNU Emacs. 8 ;; This file is part of GNU Emacs.
46 46
47 ;; It is also unnecessary to "group" parts of the expression together 47 ;; It is also unnecessary to "group" parts of the expression together
48 ;; to overcome operator precedence; that also happens automatically. 48 ;; to overcome operator precedence; that also happens automatically.
49 ;; For example: 49 ;; For example:
50 50
51 ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(Bob\\|Robert\\)?" 51 ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(?:Bob\\|Robert\\)?"
52 52
53 ;; It *is* possible to group parts of the expression in order to refer 53 ;; It *is* possible to group parts of the expression in order to refer
54 ;; to them with numbered backreferences: 54 ;; to them with numbered backreferences:
55 55
56 ;; (sregexq (group (or "Go" "Run")) 56 ;; (sregexq (group (or "Go" "Run"))
57 ;; ", Spot, " 57 ;; ", Spot, "
58 ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1" 58 ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1"
59
60 ;; If `sregexq' needs to introduce its own grouping parentheses, it
61 ;; will automatically renumber your backreferences:
62
63 ;; (sregexq (opt "resent-")
64 ;; (group (or "to" "cc" "bcc"))
65 ;; ": "
66 ;; (backref 1)) => "\\(resent-\\)?\\(to\\|cc\\|bcc\\): \\2"
67 59
68 ;; `sregexq' is a macro. Each time it is used, it constructs a simple 60 ;; `sregexq' is a macro. Each time it is used, it constructs a simple
69 ;; Lisp expression that then invokes a moderately complex engine to 61 ;; Lisp expression that then invokes a moderately complex engine to
70 ;; interpret the sregex and render the string form. Because of this, 62 ;; interpret the sregex and render the string form. Because of this,
71 ;; I don't recommend sprinkling calls to `sregexq' throughout your 63 ;; I don't recommend sprinkling calls to `sregexq' throughout your
97 ;; (let ((dotstar '(0+ any)) 89 ;; (let ((dotstar '(0+ any))
98 ;; (whitespace '(1+ (syntax ?-))) 90 ;; (whitespace '(1+ (syntax ?-)))
99 ;; (digits '(1+ (char (?0 . ?9))))) 91 ;; (digits '(1+ (char (?0 . ?9)))))
100 ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+" 92 ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+"
101 93
102 ;; This package also provides sregex-specific versions of the Emacs
103 ;; functions `replace-match', `match-string',
104 ;; `match-string-no-properties', `match-beginning', `match-end', and
105 ;; `match-data'. In each case, the sregex version's name begins with
106 ;; `sregex-' and takes one additional optional parameter, an sregex
107 ;; "info" object. Each of these functions is concerned with numbered
108 ;; submatches. Since sregex may renumber submatches, alternate
109 ;; versions of these functions are needed that know how to adjust the
110 ;; supplied number.
111
112 ;; The sregex info object for the most recently evaluated sregex can
113 ;; be obtained with `sregex-info'; so if you precompute your sregexes
114 ;; and you plan to use `replace-match' or one of the others with it,
115 ;; you need to record the info object for later use:
116
117 ;; (let* ((regex (sregexq (opt "resent-")
118 ;; (group (or "to" "cc" "bcc"))
119 ;; ":"))
120 ;; (regex-info (sregex-info)))
121 ;; ...
122 ;; (if (re-search-forward regex ...)
123 ;; (let ((which (sregex-match-string 1 nil regex-info)))
124 ;; ...)))
125
126 ;; In this example, `regex' is "\\(resent-\\)?\\(to\\|cc\\|bcc\\):",
127 ;; so the call to (sregex-match-string 1 ...) is automatically turned
128 ;; into a call to (match-string 2 ...).
129
130 ;; If the sregex info argument to `sregex-replace-match',
131 ;; `sregex-match-string', `sregex-match-string-no-properties',
132 ;; `sregex-match-beginning', `sregex-match-end', or
133 ;; `sregex-match-data' is omitted, the current value of (sregex-info)
134 ;; is used.
135
136 ;; You can do your own sregex submatch renumbering with
137 ;; `sregex-backref-num'.
138
139 ;; Finally, `sregex-save-match-data' is like `save-match-data' but
140 ;; also saves and restores the information maintained by
141 ;; `sregex-info'.
142
143 ;; To use this package in a Lisp program, simply (require 'sregex). 94 ;; To use this package in a Lisp program, simply (require 'sregex).
144 95
145 ;; Here are the clauses allowed in an `sregex' or `sregexq' 96 ;; Here are the clauses allowed in an `sregex' or `sregexq'
146 ;; expression: 97 ;; expression:
147 98
163 ;; - (group CLAUSE ...) 114 ;; - (group CLAUSE ...)
164 ;; Groups the given CLAUSEs using "\\(" and "\\)". 115 ;; Groups the given CLAUSEs using "\\(" and "\\)".
165 116
166 ;; - (sequence CLAUSE ...) 117 ;; - (sequence CLAUSE ...)
167 118
168 ;; Groups the given CLAUSEs; may or may not use "\\(" and "\\)". 119 ;; Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)".
169 ;; Clauses groups by `sequence' do not count for purposes of 120 ;; Clauses grouped by `sequence' do not count for purposes of
170 ;; numbering backreferences. Use `sequence' in situations like 121 ;; numbering backreferences. Use `sequence' in situations like
171 ;; this: 122 ;; this:
172 123
173 ;; (sregexq (or "dog" "cat" 124 ;; (sregexq (or "dog" "cat"
174 ;; (sequence (opt "sea ") "monkey"))) 125 ;; (sequence (opt "sea ") "monkey")))
175 ;; => "dog\\|cat\\|\\(sea \\)?monkey" 126 ;; => "dog\\|cat\\|\\(?:sea \\)?monkey"
176 127
177 ;; where a single `or' alternate needs to contain multiple 128 ;; where a single `or' alternate needs to contain multiple
178 ;; subclauses. 129 ;; subclauses.
179 130
180 ;; - (backref N) 131 ;; - (backref N)
181 ;; Matches the same string previously matched by the Nth "group" in 132 ;; Matches the same string previously matched by the Nth "group" in
182 ;; the same sregex. N is a positive integer. In the resulting 133 ;; the same sregex. N is a positive integer.
183 ;; regex, N may be adjusted to account for automatically introduced
184 ;; groups.
185 134
186 ;; - (or CLAUSE ...) 135 ;; - (or CLAUSE ...)
187 ;; Matches any one of the CLAUSEs by separating them with "\\|". 136 ;; Matches any one of the CLAUSEs by separating them with "\\|".
188 137
189 ;; - (0+ CLAUSE ...) 138 ;; - (0+ CLAUSE ...)
274 ;; Where MIN and MAX are characters, adds the range of characters 223 ;; Where MIN and MAX are characters, adds the range of characters
275 ;; from MIN through MAX to the set. 224 ;; from MIN through MAX to the set.
276 225
277 ;;; To do: 226 ;;; To do:
278 227
279 ;; Make (sregexq (or "a" (sequence "b" "c"))) return "a\\|bc" instead
280 ;; of "a\\|\\(bc\\)"
281
282 ;; An earlier version of this package could optionally translate the 228 ;; An earlier version of this package could optionally translate the
283 ;; symbolic regex into other languages' syntaxes, e.g. Perl. For 229 ;; symbolic regex into other languages' syntaxes, e.g. Perl. For
284 ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would 230 ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would
285 ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore 231 ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore
286 ;; such a facility. 232 ;; such a facility.
287 233
234 ;; - handle multibyte chars in sregex--char-aux
235 ;; - add support for character classes ([:blank:], ...)
236 ;; - add support for non-greedy operators *? and +?
237 ;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?"
238
288 ;;; Bugs: 239 ;;; Bugs:
289 240
290 ;; The (regex REGEX) form can confuse the code that distinguishes
291 ;; introduced groups from user-specified groups. Try to avoid using
292 ;; grouping within a `regex' form. Failing that, try to avoid using
293 ;; backrefs if you're using `regex'.
294
295 ;;; Code: 241 ;;; Code:
296 242
297 (defsubst sregex--value-unitp (val) (nth 0 val)) 243 (eval-when-compile (require 'cl))
298 (defsubst sregex--value-groups (val) (nth 1 val)) 244
299 (defsubst sregex--value-tree (val) (nth 2 val)) 245 ;; Compatibility code for when we didn't have shy-groups
300 246 (defvar sregex--current-sregex nil)
301 (defun sregex--make-value (unitp groups tree) 247 (defun sregex-info () nil)
302 (list unitp groups tree)) 248 (defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms))
303 249 (defun sregex-replace-match (r &optional f l str subexp x)
304 (defvar sregex--current-sregex nil 250 (replace-match r f l str subexp))
305 "Global state for `sregex-info'.") 251 (defun sregex-match-string (c &optional i x) (match-string c i))
306
307 (defun sregex-info ()
308 "Return extra information about the latest call to `sregex'.
309 This extra information is needed in order to adjust user-requested
310 backreference numbers to numbers suitable for the generated regexp.
311 See e.g. `sregex-match-string' and `sregex-backref-num'."
312 sregex--current-sregex)
313
314 ; (require 'advice)
315 ; (defadvice save-match-data (around sregex-save-match-data protect)
316 ; (let ((sregex--saved-sregex sregex--current-sregex))
317 ; (unwind-protect
318 ; ad-do-it
319 ; (setq sregex--current-sregex sregex--saved-sregex))))
320 (defmacro sregex-save-match-data (&rest forms)
321 "Like `save-match-data', but also saves and restores `sregex-info' data."
322 `(let ((sregex--saved-sregex sregex--current-sregex))
323 (unwind-protect
324 (save-match-data ,@forms)
325 (setq sregex--current-sregex sregex--saved-sregex))))
326
327 (defun sregex-replace-match (replacement
328 &optional fixedcase literal string subexp sregex)
329 "Like `replace-match', for a regexp made with `sregex'.
330 This takes one additional optional argument, the `sregex' info, which
331 can be obtained with `sregex-info'. The SUBEXP argument is adjusted
332 to allow for \"introduced groups\". If the extra argument is omitted
333 or nil, it defaults to the current value of (sregex-info)."
334 (replace-match replacement fixedcase literal string
335 (and subexp
336 (sregex-backref-num subexp sregex))))
337
338 (defun sregex-match-string (count &optional in-string sregex)
339 "Like `match-string', for a regexp made with `sregex'.
340 This takes one additional optional argument, the `sregex' info, which
341 can be obtained with `sregex-info'. The COUNT argument is adjusted to
342 allow for \"introduced groups\". If the extra argument is omitted or
343 nil, it defaults to the current value of (sregex-info)."
344 (match-string (and count
345 (sregex-backref-num count sregex))
346 in-string))
347
348 (defun sregex-match-string-no-properties (count &optional in-string sregex) 252 (defun sregex-match-string-no-properties (count &optional in-string sregex)
349 "Like `match-string-no-properties', for a regexp made with `sregex'. 253 (match-string-no-properties count in-string))
350 This takes one additional optional argument, the `sregex' info, which 254 (defun sregex-match-beginning (count &optional sregex) (match-beginning count))
351 can be obtained with `sregex-info'. The COUNT argument is adjusted to 255 (defun sregex-match-end (count &optional sregex) (match-end count))
352 allow for \"introduced groups\". If the extra argument is omitted or 256 (defun sregex-match-data (&optional sregex) (match-data))
353 nil, it defaults to the current value of (sregex-info)." 257 (defun sregex-backref-num (n &optional sregex) n)
354 (match-string-no-properties 258
355 (and count
356 (sregex-backref-num count sregex))
357 in-string))
358
359 (defun sregex-match-beginning (count &optional sregex)
360 "Like `match-beginning', for a regexp made with `sregex'.
361 This takes one additional optional argument, the `sregex' info, which
362 can be obtained with `sregex-info'. The COUNT argument is adjusted to
363 allow for \"introduced groups\". If the extra argument is omitted or
364 nil, it defaults to the current value of (sregex-info)."
365 (match-beginning (sregex-backref-num count sregex)))
366
367 (defun sregex-match-end (count &optional sregex)
368 "Like `match-end', for a regexp made with `sregex'.
369 This takes one additional optional argument, the `sregex' info, which
370 can be obtained with `sregex-info'. The COUNT argument is adjusted to
371 allow for \"introduced groups\". If the extra argument is omitted or
372 nil, it defaults to the current value of (sregex-info)."
373 (match-end (sregex-backref-num count sregex)))
374
375 (defun sregex-match-data (&optional sregex)
376 "Like `match-data', for a regexp made with `sregex'.
377 This takes one additional optional argument, the `sregex' info, which
378 can be obtained with `sregex-info'. \"Introduced groups\" are removed
379 from the result. If the extra argument is omitted or nil, it defaults
380 to the current value of (sregex-info)."
381 (let* ((data (match-data))
382 (groups (sregex--value-groups (or sregex
383 sregex--current-sregex)))
384 (result (list (car (cdr data))
385 (car data))))
386 (setq data (cdr (cdr data)))
387 (while data
388 (if (car groups)
389 (setq result (append (list (car (cdr data))
390 (car data))
391 result)))
392 (setq groups (cdr groups)
393 data (cdr (cdr data))))
394 (reverse result)))
395
396 (defun sregex--render-tree (tree sregex)
397 (let ((key (car tree)))
398 (cond ((eq key 'str)
399 (cdr tree))
400 ((eq key 'or)
401 (mapconcat '(lambda (x)
402 (sregex--render-tree x sregex))
403 (cdr tree)
404 "\\|"))
405 ((eq key 'sequence)
406 (apply 'concat
407 (mapcar '(lambda (x)
408 (sregex--render-tree x sregex))
409 (cdr tree))))
410 ((eq key 'group)
411 (concat "\\("
412 (sregex--render-tree (cdr tree) sregex)
413 "\\)"))
414 ((eq key 'opt)
415 (concat (sregex--render-tree (cdr tree) sregex)
416 "?"))
417 ((eq key '0+)
418 (concat (sregex--render-tree (cdr tree) sregex)
419 "*"))
420 ((eq key '1+)
421 (concat (sregex--render-tree (cdr tree) sregex)
422 "+"))
423 ((eq key 'backref)
424 (let ((num (sregex-backref-num (cdr tree) sregex)))
425 (if (> num 9)
426 (error "sregex: backref number %d too high after adjustment"
427 num)
428 (concat "\\" (int-to-string num)))))
429 (t (error "sregex internal error: unknown tree type %S"
430 key)))))
431 259
432 (defun sregex (&rest exps) 260 (defun sregex (&rest exps)
433 "Symbolic regular expression interpreter. 261 "Symbolic regular expression interpreter.
434 This is exactly like `sregexq' (q.v.) except that it evaluates all its 262 This is exactly like `sregexq' (q.v.) except that it evaluates all its
435 arguments, so literal sregex clauses must be quoted. For example: 263 arguments, so literal sregex clauses must be quoted. For example:
441 269
442 (let ((dotstar '(0+ any)) 270 (let ((dotstar '(0+ any))
443 (whitespace '(1+ (syntax ?-))) 271 (whitespace '(1+ (syntax ?-)))
444 (digits '(1+ (char (?0 . ?9))))) 272 (digits '(1+ (char (?0 . ?9)))))
445 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"" 273 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\""
446 (progn 274 (sregex--sequence exps nil))
447 (setq sregex--current-sregex (sregex--sequence exps nil))
448 (sregex--render-tree (sregex--value-tree sregex--current-sregex)
449 sregex--current-sregex)))
450 275
451 (defmacro sregexq (&rest exps) 276 (defmacro sregexq (&rest exps)
452 "Symbolic regular expression interpreter. 277 "Symbolic regular expression interpreter.
453 This macro allows you to specify a regular expression (regexp) in 278 This macro allows you to specify a regular expression (regexp) in
454 symbolic form, and converts it into the string form required by Emacs's 279 symbolic form, and converts it into the string form required by Emacs's
544 Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\". 369 Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\".
545 370
546 - (sequence CLAUSE ...) 371 - (sequence CLAUSE ...)
547 372
548 Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\". 373 Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\".
549 Clauses groups by `sequence' do not count for purposes of 374 Clauses grouped by `sequence' do not count for purposes of
550 numbering backreferences. Use `sequence' in situations like 375 numbering backreferences. Use `sequence' in situations like
551 this: 376 this:
552 377
553 (sregexq (or \"dog\" \"cat\" 378 (sregexq (or \"dog\" \"cat\"
554 (sequence (opt \"sea \") \"monkey\"))) 379 (sequence (opt \"sea \") \"monkey\")))
555 => \"dog\\\\|cat\\\\|\\\\(sea \\\\)?monkey\" 380 => \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\"
556 381
557 where a single `or' alternate needs to contain multiple 382 where a single `or' alternate needs to contain multiple
558 subclauses. 383 subclauses.
559 384
560 - (backref N) 385 - (backref N)
561 Matches the same string previously matched by the Nth \"group\" in 386 Matches the same string previously matched by the Nth \"group\" in
562 the same sregex. N is a positive integer. In the resulting 387 the same sregex. N is a positive integer.
563 regex, N may be adjusted to account for automatically introduced
564 groups.
565 388
566 - (or CLAUSE ...) 389 - (or CLAUSE ...)
567 Matches any one of the CLAUSEs by separating them with \"\\\\|\". 390 Matches any one of the CLAUSEs by separating them with \"\\\\|\".
568 391
569 - (0+ CLAUSE ...) 392 - (0+ CLAUSE ...)
637 460
638 - (regex REGEX) 461 - (regex REGEX)
639 This is a \"trapdoor\" for including ordinary regular expression 462 This is a \"trapdoor\" for including ordinary regular expression
640 strings in the result. Some regular expressions are clearer when 463 strings in the result. Some regular expressions are clearer when
641 written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for 464 written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for
642 instance. However, using this can confuse the code that 465 instance.
643 distinguishes introduced groups from user-specified groups. Avoid
644 using grouping within a `regex' form. Failing that, avoid using
645 backrefs if you're using `regex'.
646 466
647 Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...) 467 Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
648 has one of the following forms: 468 has one of the following forms:
649 469
650 - a character 470 - a character
657 Where MIN and MAX are characters, adds the range of characters 477 Where MIN and MAX are characters, adds the range of characters
658 from MIN through MAX to the set." 478 from MIN through MAX to the set."
659 `(apply 'sregex ',exps)) 479 `(apply 'sregex ',exps))
660 480
661 (defun sregex--engine (exp combine) 481 (defun sregex--engine (exp combine)
662 (let* ((val (cond ((stringp exp) 482 (cond
663 (sregex--make-value (or (not (eq combine 'suffix)) 483 ((stringp exp)
664 (= (length exp) 1)) 484 (if (and combine
665 nil 485 (eq combine 'suffix)
666 (cons 'str 486 (/= (length exp) 1))
667 (regexp-quote exp)))) 487 (concat "\\(?:" (regexp-quote exp) "\\)")
668 ((symbolp exp) 488 (regexp-quote exp)))
669 (funcall (intern (concat "sregex--" 489 ((symbolp exp)
670 (symbol-name exp))) 490 (ecase exp
671 combine)) 491 (any ".")
672 ((consp exp) 492 (bol "^")
673 (funcall (intern (concat "sregex--" 493 (eol "$")
674 (symbol-name (car exp)))) 494 (wordchar "\\w")
675 (cdr exp) 495 (not-wordchar "\\W")
676 combine)) 496 (bot "\\`")
677 (t (error "Invalid expression: %s" exp)))) 497 (eot "\\'")
678 (unitp (sregex--value-unitp val)) 498 (point "\\=")
679 (groups (sregex--value-groups val)) 499 (word-boundary "\\b")
680 (tree (sregex--value-tree val))) 500 (not-word-boundary "\\B")
681 (if (and combine (not unitp)) 501 (bow "\\<")
682 (sregex--make-value t 502 (eow "\\>")))
683 (cons nil groups) 503 ((consp exp)
684 (cons 'group tree)) 504 (funcall (intern (concat "sregex--"
685 (sregex--make-value unitp groups tree)))) 505 (symbol-name (car exp))))
506 (cdr exp)
507 combine))
508 (t (error "Invalid expression: %s" exp))))
686 509
687 (defun sregex--sequence (exps combine) 510 (defun sregex--sequence (exps combine)
688 (if (= (length exps) 1) 511 (if (= (length exps) 1) (sregex--engine (car exps) combine)
689 (sregex--engine (car exps) combine) 512 (let ((re (mapconcat
690 (let ((groups nil) 513 (lambda (e) (sregex--engine e 'concat))
691 (trees nil)) ;grows in reverse 514 exps "")))
692 (while exps
693 (let ((val (sregex--engine (car exps) 'concat)))
694 (setq groups (append groups
695 (sregex--value-groups val))
696 trees (cons (sregex--value-tree val) trees)
697 exps (cdr exps))))
698 (setq trees (nreverse trees))
699 (if (eq combine 'suffix) 515 (if (eq combine 'suffix)
700 (sregex--make-value t 516 (concat "\\(?:" re "\\)")
701 (cons nil groups) 517 re))))
702 (cons 'group 518
703 (cons 'sequence trees))) 519 (defun sregex--or (exps combine)
704 (sregex--make-value (not (eq combine 'suffix)) 520 (if (= (length exps) 1) (sregex--engine (car exps) combine)
705 groups 521 (let ((re (mapconcat
706 (cons 'sequence trees)))))) 522 (lambda (e) (sregex--engine e 'or))
707 523 exps "\\|")))
708 (defun sregex--group (exps combine) 524 (if (not (eq combine 'or))
709 (let ((val (sregex--sequence exps nil))) 525 (concat "\\(?:" re "\\)")
710 (sregex--make-value t 526 re))))
711 (cons t (sregex--value-groups val)) 527
712 (cons 'group (sregex--value-tree val))))) 528 (defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)"))
713 529
714 (defun sregex-backref-num (n &optional sregex) 530 (defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps))))
715 "Adjust backreference number N according to SREGEX. 531 (defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?"))
716 When `sregex' introduces parenthesized groups that the user didn't ask 532 (defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*"))
717 for, the numbering of the groups that the user *did* ask for gets all 533 (defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+"))
718 out of whack. This function accounts for introduced groups. Example: 534
719 535 (defun sregex--char (exps combine) (sregex--char-aux nil exps))
720 (sregexq (opt \"ab\") 536 (defun sregex--not-char (exps combine) (sregex--char-aux t exps))
721 (group (or \"c\" \"d\"))) => \"\\\\(ab\\\\)?\\\\(c\\\\|d\\\\)\" 537
722 (setq info (sregex-info)) 538 (defun sregex--syntax (exps combine) (format "\\s%c" (car exps)))
723 (sregex-backref-num 1 info) => 2 539 (defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps)))
724 540
725 The SREGEX parameter is optional and defaults to the current value of 541 (defun sregex--regex (exps combine)
726 `sregex-info'." 542 (if combine (concat "\\(?:" (car exps) "\\)") (car exps)))
727 (let ((groups (sregex--value-groups (or sregex
728 sregex--current-sregex)))
729 (result 0))
730 (while (and groups (> n 0))
731 (if (car groups)
732 (setq n (1- n)))
733 (setq result (1+ result)
734 groups (cdr groups)))
735 result))
736
737 (defun sregex--backref (exps combine)
738 (sregex--make-value t nil (cons 'backref (car exps))))
739
740 (defun sregex--any (combine)
741 (sregex--make-value t nil '(str . ".")))
742
743 (defun sregex--opt (exps combine)
744 (let ((val (sregex--sequence exps 'suffix)))
745 (sregex--make-value t
746 (sregex--value-groups val)
747 (cons 'opt (sregex--value-tree val)))))
748
749 (defun sregex--0+ (exps combine)
750 (let ((val (sregex--sequence exps 'suffix)))
751 (sregex--make-value t
752 (sregex--value-groups val)
753 (cons '0+ (sregex--value-tree val)))))
754 (defun sregex--1+ (exps combine)
755 (let ((val (sregex--sequence exps 'suffix)))
756 (sregex--make-value t
757 (sregex--value-groups val)
758 (cons '1+ (sregex--value-tree val)))))
759 543
760 (defun sregex--repeat (exps combine) 544 (defun sregex--repeat (exps combine)
761 (let ((min (or (car exps) 0)) 545 (let* ((min (or (pop exps) 0))
762 (max (car (cdr exps)))) 546 (minstr (number-to-string min))
763 (setq exps (cdr (cdr exps))) 547 (max (pop exps)))
764 (cond ((zerop min) 548 (concat (sregex--sequence exps 'suffix)
765 (cond ((equal max 0) ;degenerate 549 (concat "\\{" minstr ","
766 (sregex--make-value t nil nil)) 550 (when max (number-to-string max)) "\\}"))))
767 ((equal max 1) 551
768 (sregex--opt exps combine)) 552 (defun sregex--char-range (start end)
769 ((not max) 553 (let ((startc (char-to-string start))
770 (sregex--0+ exps combine)) 554 (endc (char-to-string end)))
771 (t (sregex--sequence (make-list max 555 (cond
772 (cons 'opt exps)) 556 ((> end (+ start 2)) (concat startc "-" endc))
773 combine)))) 557 ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc))
774 ((= min 1) 558 ((> end start) (concat startc endc))
775 (cond ((equal max 1) 559 (t startc))))
776 (sregex--sequence exps combine))
777 ((not max)
778 (sregex--1+ exps combine))
779 (t (sregex--sequence (append exps
780 (make-list (1- max)
781 (cons 'opt exps)))
782 combine))))
783 (t (sregex--sequence (append exps
784 (list (append (list 'repeat
785 (1- min)
786 (and max
787 (1- max)))
788 exps)))
789 combine)))))
790
791 (defun sregex--or (exps combine)
792 (if (= (length exps) 1)
793 (sregex--engine (car exps) combine)
794 (let ((groups nil)
795 (trees nil))
796 (while exps
797 (let ((val (sregex--engine (car exps) 'or)))
798 (setq groups (append groups
799 (sregex--value-groups val))
800 trees (cons (sregex--value-tree val) trees)
801 exps (cdr exps))))
802 (sregex--make-value (eq combine 'or)
803 groups
804 (cons 'or (nreverse trees))))))
805
806 (defmacro sregex--char-range-aux ()
807 '(if start
808 (let (startc endc)
809 (if (and (<= 32 start)
810 (<= start 127))
811 (setq startc (char-to-string start)
812 endc (char-to-string end))
813 (setq startc (format "\\%03o" start)
814 endc (format "\\%03o" end)))
815 (if (> end start)
816 (if (> end (+ start 1))
817 (setq class (concat class startc "-" endc))
818 (setq class (concat class startc endc)))
819 (setq class (concat class startc))))))
820
821 (defmacro sregex--char-range (rstart rend)
822 `(let ((i ,rstart)
823 start end)
824 (while (<= i ,rend)
825 (if (aref chars i)
826 (progn
827 (if start
828 (setq end i)
829 (setq start i
830 end i))
831 (aset chars i nil))
832 (sregex--char-range-aux)
833 (setq start nil
834 end nil))
835 (setq i (1+ i)))
836 (sregex--char-range-aux)))
837 560
838 (defun sregex--char-aux (complement args) 561 (defun sregex--char-aux (complement args)
839 (let ((chars (make-vector 256 nil))) 562 ;; regex-opt does the same, we should join effort.
840 (while args 563 (let ((chars (make-bool-vector 256 nil))) ; Yeah, right!
841 (let ((arg (car args))) 564 (dolist (arg args)
842 (cond ((integerp arg) 565 (cond ((integerp arg) (aset chars arg t))
843 (aset chars arg t)) 566 ((stringp arg) (mapcar (lambda (c) (aset chars c t)) arg))
844 ((stringp arg) 567 ((consp arg)
845 (mapcar (function 568 (let ((start (car arg))
846 (lambda (c) 569 (end (cdr arg)))
847 (aset chars c t))) 570 (when (> start end)
848 arg)) 571 (let ((tmp start)) (setq start end) (setq end tmp)))
849 ((consp arg) 572 ;; now start <= end
850 (let ((start (car arg)) 573 (let ((i start))
851 (end (cdr arg))) 574 (while (<= i end)
852 (if (> start end) 575 (aset chars i t)
853 (let ((tmp start)) 576 (setq i (1+ i))))))))
854 (setq start end
855 end tmp)))
856 ;; now start <= end
857 (let ((i start))
858 (while (<= i end)
859 (aset chars i t)
860 (setq i (1+ i))))))))
861 (setq args (cdr args)))
862 ;; now chars is a map of the characters in the class 577 ;; now chars is a map of the characters in the class
863 (let ((class "") 578 (let ((caret (aref chars ?^))
864 (caret (aref chars ?^))) 579 (dash (aref chars ?-))
580 (class (if (aref chars ?\]) "]" "")))
865 (aset chars ?^ nil) 581 (aset chars ?^ nil)
866 (if (aref chars ?\]) 582 (aset chars ?- nil)
867 (progn 583 (aset chars ?\] nil)
868 (setq class (concat class "]")) 584
869 (aset chars ?\] nil))) 585 (let (start end)
870 (if (aref chars ?-) 586 (dotimes (i 256)
871 (progn 587 (if (aref chars i)
872 (setq class (concat class "-")) 588 (progn
873 (aset chars ?- nil))) 589 (unless start (setq start i))
874 (if (aref chars ?\\) 590 (setq end i)
875 (progn 591 (aset chars i nil))
876 (setq class (concat class "\\\\")) 592 (when start
877 (aset chars ?\\ nil))) 593 (setq class (concat class (sregex--char-range start end)))
878 594 (setq start nil))))
879 (sregex--char-range ?A ?Z) 595 (if start
880 (sregex--char-range ?a ?z) 596 (setq class (concat class (sregex--char-range start end)))))
881 (sregex--char-range ?0 ?9) 597
882 598 (if (> (length class) 0)
883 (let ((i 32)) 599 (setq class (concat class (if caret "^") (if dash "-")))
884 (while (< i 128) 600 (setq class (concat class (if dash "-") (if caret "^"))))
885 (if (aref chars i) 601 (if (and (not complement) (= (length class) 1))
886 (progn 602 (regexp-quote class)
887 (setq class (concat class (char-to-string i))) 603 (concat "[" (if complement "^") class "]")))))
888 (aset chars i nil)))
889 (setq i (1+ i))))
890
891 (sregex--char-range 0 31)
892 (sregex--char-range 128 255)
893
894 (let ((i 0))
895 (while (< i 256)
896 (if (aref chars i)
897 (setq class (concat class (format "\\%03o" i))))
898 (setq i (1+ i))))
899
900 (if caret
901 (setq class (concat class "^")))
902 (concat "[" (if complement "^") class "]"))))
903
904 (defun sregex--char (exps combine)
905 (sregex--make-value t nil (cons 'str (sregex--char-aux nil exps))))
906 (defun sregex--not-char (exps combine)
907 (sregex--make-value t nil (cons 'str (sregex--char-aux t exps))))
908
909 (defun sregex--bol (combine)
910 (sregex--make-value t nil '(str . "^")))
911 (defun sregex--eol (combine)
912 (sregex--make-value t nil '(str . "$")))
913
914 (defun sregex--wordchar (combine)
915 (sregex--make-value t nil '(str . "\\w")))
916 (defun sregex--not-wordchar (combine)
917 (sregex--make-value t nil '(str . "\\W")))
918
919 (defun sregex--syntax (exps combine)
920 (sregex--make-value t nil (cons 'str (format "\\s%c" (car exps)))))
921 (defun sregex--not-syntax (exps combine)
922 (sregex--make-value t nil (cons 'str (format "\\S%c" (car exps)))))
923
924 (defun sregex--bot (combine)
925 (sregex--make-value t nil (cons 'str "\\`")))
926 (defun sregex--eot (combine)
927 (sregex--make-value t nil (cons 'str "\\'")))
928
929 (defun sregex--point (combine)
930 (sregex--make-value t nil '(str . "\\=")))
931
932 (defun sregex--word-boundary (combine)
933 (sregex--make-value t nil '(str . "\\b")))
934 (defun sregex--not-word-boundary (combine)
935 (sregex--make-value t nil '(str . "\\B")))
936
937 (defun sregex--bow (combine)
938 (sregex--make-value t nil '(str . "\\<")))
939 (defun sregex--eow (combine)
940 (sregex--make-value t nil '(str . "\\>")))
941
942
943 ;; trapdoor - usage discouraged
944 (defun sregex--regex (exps combine)
945 (sregex--make-value nil nil (car exps)))
946 604
947 (provide 'sregex) 605 (provide 'sregex)
948 606
949 ;;; sregex.el ends here 607 ;;; sregex.el ends here
950 608