Mercurial > emacs
annotate lisp/emacs-lisp/sregex.el @ 54736:b94de166de9d
(ethio-sera-being-called-by-w3): New
variable.
(ethio-sera-to-fidel-ethio): Check ethio-sera-being-called-by-w3
instead of sera-being-called-by-w3.
(ethio-fidel-to-sera-buffer): Likewise.
(ethio-find-file): Bind ethio-sera-being-called-by-w3 to t
instead of sera-being-called-by-w3.
(ethio-write-file): Likewise.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Mon, 05 Apr 2004 23:27:37 +0000 |
| parents | 695cf19ef79e |
| children | 18a818a2ee7c 375f2633d815 |
| rev | line source |
|---|---|
| 22537 | 1 ;;; sregex.el --- symbolic regular expressions |
| 2 | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
3 ;; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc. |
| 22537 | 4 |
| 5 ;; Author: Bob Glickstein <bobg+sregex@zanshin.com> | |
| 6 ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com> | |
| 29212 | 7 ;; Keywords: extensions |
| 22537 | 8 |
| 9 ;; This file is part of GNU Emacs. | |
| 10 | |
| 11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
| 12 ;; it under the terms of the GNU General Public License as published by | |
| 13 ;; the Free Software Foundation; either version 2, or (at your option) | |
| 14 ;; any later version. | |
| 15 | |
| 16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
| 17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 19 ;; GNU General Public License for more details. | |
| 20 | |
| 21 ;; You should have received a copy of the GNU General Public License | |
| 22 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
| 23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 24 ;; Boston, MA 02111-1307, USA. | |
| 25 | |
| 26 ;;; Commentary: | |
| 27 | |
| 28 ;; This package allows you to write regular expressions using a | |
| 29 ;; totally new, Lisp-like syntax. | |
| 30 | |
| 31 ;; A "symbolic regular expression" (sregex for short) is a Lisp form | |
| 32 ;; that, when evaluated, produces the string form of the specified | |
| 33 ;; regular expression. Here's a simple example: | |
| 34 | |
| 35 ;; (sregexq (or "Bob" "Robert")) => "Bob\\|Robert" | |
| 36 | |
| 37 ;; As you can see, an sregex is specified by placing one or more | |
| 38 ;; special clauses in a call to `sregexq'. The clause in this case is | |
| 39 ;; the `or' of two strings (not to be confused with the Lisp function | |
| 40 ;; `or'). The list of allowable clauses appears below. | |
| 41 | |
| 42 ;; With sregex, it is never necessary to "escape" magic characters | |
| 43 ;; that are meant to be taken literally; that happens automatically. | |
| 44 ;; For example: | |
| 45 | |
| 46 ;; (sregexq "M*A*S*H") => "M\\*A\\*S\\*H" | |
| 47 | |
| 48 ;; It is also unnecessary to "group" parts of the expression together | |
| 49 ;; to overcome operator precedence; that also happens automatically. | |
| 50 ;; For example: | |
| 51 | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
52 ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(?:Bob\\|Robert\\)?" |
| 22537 | 53 |
| 54 ;; It *is* possible to group parts of the expression in order to refer | |
| 55 ;; to them with numbered backreferences: | |
| 56 | |
| 57 ;; (sregexq (group (or "Go" "Run")) | |
| 58 ;; ", Spot, " | |
| 59 ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1" | |
| 60 | |
| 61 ;; `sregexq' is a macro. Each time it is used, it constructs a simple | |
| 62 ;; Lisp expression that then invokes a moderately complex engine to | |
| 63 ;; interpret the sregex and render the string form. Because of this, | |
| 64 ;; I don't recommend sprinkling calls to `sregexq' throughout your | |
| 65 ;; code, the way one normally does with string regexes (which are | |
| 66 ;; cheap to evaluate). Instead, it's wiser to precompute the regexes | |
| 67 ;; you need wherever possible instead of repeatedly constructing the | |
| 68 ;; same ones over and over. Example: | |
| 69 | |
| 70 ;; (let ((field-regex (sregexq (opt "resent-") | |
| 71 ;; (or "to" "cc" "bcc")))) | |
| 72 ;; ... | |
| 73 ;; (while ... | |
| 74 ;; ... | |
| 75 ;; (re-search-forward field-regex ...) | |
| 76 ;; ...)) | |
| 77 | |
| 78 ;; The arguments to `sregexq' are automatically quoted, but the | |
| 79 ;; flipside of this is that it is not straightforward to include | |
| 80 ;; computed (i.e., non-constant) values in `sregexq' expressions. So | |
| 81 ;; `sregex' is a function that is like `sregexq' but which does not | |
| 82 ;; automatically quote its values. Literal sregex clauses must be | |
| 83 ;; explicitly quoted like so: | |
| 84 | |
| 85 ;; (sregex '(or "Bob" "Robert")) => "Bob\\|Robert" | |
| 86 | |
| 87 ;; but computed clauses can be included easily, allowing for the reuse | |
| 88 ;; of common clauses: | |
| 89 | |
| 90 ;; (let ((dotstar '(0+ any)) | |
| 91 ;; (whitespace '(1+ (syntax ?-))) | |
| 92 ;; (digits '(1+ (char (?0 . ?9))))) | |
| 93 ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+" | |
| 94 | |
| 95 ;; To use this package in a Lisp program, simply (require 'sregex). | |
| 96 | |
| 97 ;; Here are the clauses allowed in an `sregex' or `sregexq' | |
| 98 ;; expression: | |
| 99 | |
| 100 ;; - a string | |
| 101 ;; This stands for the literal string. If it contains | |
| 102 ;; metacharacters, they will be escaped in the resulting regex | |
| 103 ;; (using `regexp-quote'). | |
| 104 | |
| 105 ;; - the symbol `any' | |
| 106 ;; This stands for ".", a regex matching any character except | |
| 107 ;; newline. | |
| 108 | |
| 109 ;; - the symbol `bol' | |
| 110 ;; Stands for "^", matching the empty string at the beginning of a line | |
| 111 | |
| 112 ;; - the symbol `eol' | |
| 113 ;; Stands for "$", matching the empty string at the end of a line | |
| 114 | |
| 115 ;; - (group CLAUSE ...) | |
| 116 ;; Groups the given CLAUSEs using "\\(" and "\\)". | |
| 117 | |
| 118 ;; - (sequence CLAUSE ...) | |
| 119 | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
120 ;; Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)". |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
121 ;; Clauses grouped by `sequence' do not count for purposes of |
| 22537 | 122 ;; numbering backreferences. Use `sequence' in situations like |
| 123 ;; this: | |
| 124 | |
| 125 ;; (sregexq (or "dog" "cat" | |
| 126 ;; (sequence (opt "sea ") "monkey"))) | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
127 ;; => "dog\\|cat\\|\\(?:sea \\)?monkey" |
| 22537 | 128 |
| 129 ;; where a single `or' alternate needs to contain multiple | |
| 130 ;; subclauses. | |
| 131 | |
| 132 ;; - (backref N) | |
| 133 ;; Matches the same string previously matched by the Nth "group" in | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
134 ;; the same sregex. N is a positive integer. |
| 22537 | 135 |
| 136 ;; - (or CLAUSE ...) | |
| 137 ;; Matches any one of the CLAUSEs by separating them with "\\|". | |
| 138 | |
| 139 ;; - (0+ CLAUSE ...) | |
| 140 ;; Concatenates the given CLAUSEs and matches zero or more | |
| 141 ;; occurrences by appending "*". | |
| 142 | |
| 143 ;; - (1+ CLAUSE ...) | |
| 144 ;; Concatenates the given CLAUSEs and matches one or more | |
| 145 ;; occurrences by appending "+". | |
| 146 | |
| 147 ;; - (opt CLAUSE ...) | |
| 148 ;; Concatenates the given CLAUSEs and matches zero or one occurrence | |
| 149 ;; by appending "?". | |
| 150 | |
| 151 ;; - (repeat MIN MAX CLAUSE ...) | |
| 152 ;; Concatenates the given CLAUSEs and constructs a regex matching at | |
| 153 ;; least MIN occurrences and at most MAX occurrences. MIN must be a | |
| 154 ;; non-negative integer. MAX must be a non-negative integer greater | |
| 155 ;; than or equal to MIN; or MAX can be nil to mean "infinity." | |
| 156 | |
| 157 ;; - (char CHAR-CLAUSE ...) | |
| 158 ;; Creates a "character class" matching one character from the given | |
| 159 ;; set. See below for how to construct a CHAR-CLAUSE. | |
| 160 | |
| 161 ;; - (not-char CHAR-CLAUSE ...) | |
| 162 ;; Creates a "character class" matching any one character not in the | |
| 163 ;; given set. See below for how to construct a CHAR-CLAUSE. | |
| 164 | |
| 165 ;; - the symbol `bot' | |
| 166 ;; Stands for "\\`", matching the empty string at the beginning of | |
| 167 ;; text (beginning of a string or of a buffer). | |
| 168 | |
| 169 ;; - the symbol `eot' | |
| 170 ;; Stands for "\\'", matching the empty string at the end of text. | |
| 171 | |
| 172 ;; - the symbol `point' | |
| 173 ;; Stands for "\\=", matching the empty string at point. | |
| 174 | |
| 175 ;; - the symbol `word-boundary' | |
| 176 ;; Stands for "\\b", matching the empty string at the beginning or | |
| 177 ;; end of a word. | |
| 178 | |
| 179 ;; - the symbol `not-word-boundary' | |
| 180 ;; Stands for "\\B", matching the empty string not at the beginning | |
| 181 ;; or end of a word. | |
| 182 | |
| 183 ;; - the symbol `bow' | |
| 184 ;; Stands for "\\<", matching the empty string at the beginning of a | |
| 185 ;; word. | |
| 186 | |
| 187 ;; - the symbol `eow' | |
| 188 ;; Stands for "\\>", matching the empty string at the end of a word. | |
| 189 | |
| 190 ;; - the symbol `wordchar' | |
| 191 ;; Stands for the regex "\\w", matching a word-constituent character | |
| 192 ;; (as determined by the current syntax table) | |
| 193 | |
| 194 ;; - the symbol `not-wordchar' | |
| 195 ;; Stands for the regex "\\W", matching a non-word-constituent | |
| 196 ;; character. | |
| 197 | |
| 198 ;; - (syntax CODE) | |
| 199 ;; Stands for the regex "\\sCODE", where CODE is a syntax table code | |
| 200 ;; (a single character). Matches any character with the requested | |
| 201 ;; syntax. | |
| 202 | |
| 203 ;; - (not-syntax CODE) | |
| 204 ;; Stands for the regex "\\SCODE", where CODE is a syntax table code | |
| 205 ;; (a single character). Matches any character without the | |
| 206 ;; requested syntax. | |
| 207 | |
| 208 ;; - (regex REGEX) | |
| 209 ;; This is a "trapdoor" for including ordinary regular expression | |
| 210 ;; strings in the result. Some regular expressions are clearer when | |
| 211 ;; written the old way: "[a-z]" vs. (sregexq (char (?a . ?z))), for | |
| 212 ;; instance. However, see the note under "Bugs," below. | |
| 213 | |
| 214 ;; Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...) | |
| 215 ;; has one of the following forms: | |
| 216 | |
| 217 ;; - a character | |
| 218 ;; Adds that character to the set. | |
| 219 | |
| 220 ;; - a string | |
| 221 ;; Adds all the characters in the string to the set. | |
| 222 | |
| 223 ;; - A pair (MIN . MAX) | |
| 224 ;; Where MIN and MAX are characters, adds the range of characters | |
| 225 ;; from MIN through MAX to the set. | |
| 226 | |
| 227 ;;; To do: | |
| 228 | |
| 229 ;; An earlier version of this package could optionally translate the | |
| 230 ;; symbolic regex into other languages' syntaxes, e.g. Perl. For | |
| 231 ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would | |
| 232 ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore | |
| 233 ;; such a facility. | |
| 234 | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
235 ;; - handle multibyte chars in sregex--char-aux |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
236 ;; - add support for character classes ([:blank:], ...) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
237 ;; - add support for non-greedy operators *? and +? |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
238 ;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?" |
| 22537 | 239 |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
240 ;;; Bugs: |
| 22537 | 241 |
| 242 ;;; Code: | |
| 243 | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
244 (eval-when-compile (require 'cl)) |
| 22537 | 245 |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
246 ;; Compatibility code for when we didn't have shy-groups |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
247 (defvar sregex--current-sregex nil) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
248 (defun sregex-info () nil) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
249 (defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
250 (defun sregex-replace-match (r &optional f l str subexp x) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
251 (replace-match r f l str subexp)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
252 (defun sregex-match-string (c &optional i x) (match-string c i)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
253 (defun sregex-match-string-no-properties (count &optional in-string sregex) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
254 (match-string-no-properties count in-string)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
255 (defun sregex-match-beginning (count &optional sregex) (match-beginning count)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
256 (defun sregex-match-end (count &optional sregex) (match-end count)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
257 (defun sregex-match-data (&optional sregex) (match-data)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
258 (defun sregex-backref-num (n &optional sregex) n) |
| 22537 | 259 |
| 260 | |
| 261 (defun sregex (&rest exps) | |
| 262 "Symbolic regular expression interpreter. | |
| 263 This is exactly like `sregexq' (q.v.) except that it evaluates all its | |
| 264 arguments, so literal sregex clauses must be quoted. For example: | |
| 265 | |
| 266 (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\" | |
| 267 | |
| 268 An argument-evaluating sregex interpreter lets you reuse sregex | |
| 269 subexpressions: | |
| 270 | |
| 271 (let ((dotstar '(0+ any)) | |
| 272 (whitespace '(1+ (syntax ?-))) | |
| 273 (digits '(1+ (char (?0 . ?9))))) | |
| 274 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"" | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
275 (sregex--sequence exps nil)) |
| 22537 | 276 |
| 277 (defmacro sregexq (&rest exps) | |
| 278 "Symbolic regular expression interpreter. | |
| 279 This macro allows you to specify a regular expression (regexp) in | |
| 280 symbolic form, and converts it into the string form required by Emacs's | |
| 281 regex functions such as `re-search-forward' and `looking-at'. Here is | |
| 282 a simple example: | |
| 283 | |
| 284 (sregexq (or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\" | |
| 285 | |
| 286 As you can see, an sregex is specified by placing one or more special | |
| 287 clauses in a call to `sregexq'. The clause in this case is the `or' | |
| 288 of two strings (not to be confused with the Lisp function `or'). The | |
| 289 list of allowable clauses appears below. | |
| 290 | |
| 291 With `sregex', it is never necessary to \"escape\" magic characters | |
| 292 that are meant to be taken literally; that happens automatically. | |
| 293 For example: | |
| 294 | |
| 295 (sregexq \"M*A*S*H\") => \"M\\\\*A\\\\*S\\\\*H\" | |
| 296 | |
| 297 It is also unnecessary to \"group\" parts of the expression together | |
| 298 to overcome operator precedence; that also happens automatically. | |
| 299 For example: | |
| 300 | |
| 301 (sregexq (opt (or \"Bob\" \"Robert\"))) => \"\\\\(Bob\\\\|Robert\\\\)?\" | |
| 302 | |
| 303 It *is* possible to group parts of the expression in order to refer | |
| 304 to them with numbered backreferences: | |
| 305 | |
| 306 (sregexq (group (or \"Go\" \"Run\")) | |
| 307 \", Spot, \" | |
| 308 (backref 1)) => \"\\\\(Go\\\\|Run\\\\), Spot, \\\\1\" | |
| 309 | |
| 310 If `sregexq' needs to introduce its own grouping parentheses, it will | |
| 311 automatically renumber your backreferences: | |
| 312 | |
| 313 (sregexq (opt \"resent-\") | |
| 314 (group (or \"to\" \"cc\" \"bcc\")) | |
| 315 \": \" | |
| 316 (backref 1)) => \"\\\\(resent-\\\\)?\\\\(to\\\\|cc\\\\|bcc\\\\): \\\\2\" | |
| 317 | |
| 318 `sregexq' is a macro. Each time it is used, it constructs a simple | |
| 319 Lisp expression that then invokes a moderately complex engine to | |
| 320 interpret the sregex and render the string form. Because of this, I | |
| 321 don't recommend sprinkling calls to `sregexq' throughout your code, | |
| 322 the way one normally does with string regexes (which are cheap to | |
| 323 evaluate). Instead, it's wiser to precompute the regexes you need | |
| 324 wherever possible instead of repeatedly constructing the same ones | |
| 325 over and over. Example: | |
| 326 | |
| 327 (let ((field-regex (sregexq (opt \"resent-\") | |
| 328 (or \"to\" \"cc\" \"bcc\")))) | |
| 329 ... | |
| 330 (while ... | |
| 331 ... | |
| 332 (re-search-forward field-regex ...) | |
| 333 ...)) | |
| 334 | |
| 335 The arguments to `sregexq' are automatically quoted, but the | |
| 336 flipside of this is that it is not straightforward to include | |
| 337 computed (i.e., non-constant) values in `sregexq' expressions. So | |
| 338 `sregex' is a function that is like `sregexq' but which does not | |
| 339 automatically quote its values. Literal sregex clauses must be | |
| 340 explicitly quoted like so: | |
| 341 | |
| 342 (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\" | |
| 343 | |
| 344 but computed clauses can be included easily, allowing for the reuse | |
| 345 of common clauses: | |
| 346 | |
| 347 (let ((dotstar '(0+ any)) | |
| 348 (whitespace '(1+ (syntax ?-))) | |
| 349 (digits '(1+ (char (?0 . ?9))))) | |
| 350 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\" | |
| 351 | |
| 352 Here are the clauses allowed in an `sregex' or `sregexq' expression: | |
| 353 | |
| 354 - a string | |
| 355 This stands for the literal string. If it contains | |
| 356 metacharacters, they will be escaped in the resulting regex | |
| 357 (using `regexp-quote'). | |
| 358 | |
| 359 - the symbol `any' | |
| 360 This stands for \".\", a regex matching any character except | |
| 361 newline. | |
| 362 | |
| 363 - the symbol `bol' | |
| 364 Stands for \"^\", matching the empty string at the beginning of a line | |
| 365 | |
| 366 - the symbol `eol' | |
| 367 Stands for \"$\", matching the empty string at the end of a line | |
| 368 | |
| 369 - (group CLAUSE ...) | |
| 370 Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\". | |
| 371 | |
| 372 - (sequence CLAUSE ...) | |
| 373 | |
| 374 Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\". | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
375 Clauses grouped by `sequence' do not count for purposes of |
| 22537 | 376 numbering backreferences. Use `sequence' in situations like |
| 377 this: | |
| 378 | |
| 379 (sregexq (or \"dog\" \"cat\" | |
| 380 (sequence (opt \"sea \") \"monkey\"))) | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
381 => \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\" |
| 22537 | 382 |
| 383 where a single `or' alternate needs to contain multiple | |
| 384 subclauses. | |
| 385 | |
| 386 - (backref N) | |
| 387 Matches the same string previously matched by the Nth \"group\" in | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
388 the same sregex. N is a positive integer. |
| 22537 | 389 |
| 390 - (or CLAUSE ...) | |
| 391 Matches any one of the CLAUSEs by separating them with \"\\\\|\". | |
| 392 | |
| 393 - (0+ CLAUSE ...) | |
| 394 Concatenates the given CLAUSEs and matches zero or more | |
| 395 occurrences by appending \"*\". | |
| 396 | |
| 397 - (1+ CLAUSE ...) | |
| 398 Concatenates the given CLAUSEs and matches one or more | |
| 399 occurrences by appending \"+\". | |
| 400 | |
| 401 - (opt CLAUSE ...) | |
| 402 Concatenates the given CLAUSEs and matches zero or one occurrence | |
| 403 by appending \"?\". | |
| 404 | |
| 405 - (repeat MIN MAX CLAUSE ...) | |
| 406 Concatenates the given CLAUSEs and constructs a regex matching at | |
| 407 least MIN occurrences and at most MAX occurrences. MIN must be a | |
| 408 non-negative integer. MAX must be a non-negative integer greater | |
| 409 than or equal to MIN; or MAX can be nil to mean \"infinity.\" | |
| 410 | |
| 411 - (char CHAR-CLAUSE ...) | |
| 412 Creates a \"character class\" matching one character from the given | |
| 413 set. See below for how to construct a CHAR-CLAUSE. | |
| 414 | |
| 415 - (not-char CHAR-CLAUSE ...) | |
| 416 Creates a \"character class\" matching any one character not in the | |
| 417 given set. See below for how to construct a CHAR-CLAUSE. | |
| 418 | |
| 419 - the symbol `bot' | |
| 420 Stands for \"\\\\`\", matching the empty string at the beginning of | |
| 421 text (beginning of a string or of a buffer). | |
| 422 | |
| 423 - the symbol `eot' | |
| 424 Stands for \"\\\\'\", matching the empty string at the end of text. | |
| 425 | |
| 426 - the symbol `point' | |
| 427 Stands for \"\\\\=\", matching the empty string at point. | |
| 428 | |
| 429 - the symbol `word-boundary' | |
| 430 Stands for \"\\\\b\", matching the empty string at the beginning or | |
| 431 end of a word. | |
| 432 | |
| 433 - the symbol `not-word-boundary' | |
| 434 Stands for \"\\\\B\", matching the empty string not at the beginning | |
| 435 or end of a word. | |
| 436 | |
| 437 - the symbol `bow' | |
| 438 Stands for \"\\\\\\=<\", matching the empty string at the beginning of a | |
| 439 word. | |
| 440 | |
| 441 - the symbol `eow' | |
| 442 Stands for \"\\\\\\=>\", matching the empty string at the end of a word. | |
| 443 | |
| 444 - the symbol `wordchar' | |
| 445 Stands for the regex \"\\\\w\", matching a word-constituent character | |
| 446 (as determined by the current syntax table) | |
| 447 | |
| 448 - the symbol `not-wordchar' | |
| 449 Stands for the regex \"\\\\W\", matching a non-word-constituent | |
| 450 character. | |
| 451 | |
| 452 - (syntax CODE) | |
| 453 Stands for the regex \"\\\\sCODE\", where CODE is a syntax table code | |
| 454 (a single character). Matches any character with the requested | |
| 455 syntax. | |
| 456 | |
| 457 - (not-syntax CODE) | |
| 458 Stands for the regex \"\\\\SCODE\", where CODE is a syntax table code | |
| 459 (a single character). Matches any character without the | |
| 460 requested syntax. | |
| 461 | |
| 462 - (regex REGEX) | |
| 463 This is a \"trapdoor\" for including ordinary regular expression | |
| 464 strings in the result. Some regular expressions are clearer when | |
| 465 written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
466 instance. |
| 22537 | 467 |
| 468 Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...) | |
| 469 has one of the following forms: | |
| 470 | |
| 471 - a character | |
| 472 Adds that character to the set. | |
| 473 | |
| 474 - a string | |
| 475 Adds all the characters in the string to the set. | |
| 476 | |
| 477 - A pair (MIN . MAX) | |
| 478 Where MIN and MAX are characters, adds the range of characters | |
| 479 from MIN through MAX to the set." | |
| 480 `(apply 'sregex ',exps)) | |
| 481 | |
| 482 (defun sregex--engine (exp combine) | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
483 (cond |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
484 ((stringp exp) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
485 (if (and combine |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
486 (eq combine 'suffix) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
487 (/= (length exp) 1)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
488 (concat "\\(?:" (regexp-quote exp) "\\)") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
489 (regexp-quote exp))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
490 ((symbolp exp) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
491 (ecase exp |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
492 (any ".") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
493 (bol "^") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
494 (eol "$") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
495 (wordchar "\\w") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
496 (not-wordchar "\\W") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
497 (bot "\\`") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
498 (eot "\\'") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
499 (point "\\=") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
500 (word-boundary "\\b") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
501 (not-word-boundary "\\B") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
502 (bow "\\<") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
503 (eow "\\>"))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
504 ((consp exp) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
505 (funcall (intern (concat "sregex--" |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
506 (symbol-name (car exp)))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
507 (cdr exp) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
508 combine)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
509 (t (error "Invalid expression: %s" exp)))) |
| 22537 | 510 |
| 511 (defun sregex--sequence (exps combine) | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
512 (if (= (length exps) 1) (sregex--engine (car exps) combine) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
513 (let ((re (mapconcat |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
514 (lambda (e) (sregex--engine e 'concat)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
515 exps ""))) |
| 22537 | 516 (if (eq combine 'suffix) |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
517 (concat "\\(?:" re "\\)") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
518 re)))) |
| 22537 | 519 |
| 520 (defun sregex--or (exps combine) | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
521 (if (= (length exps) 1) (sregex--engine (car exps) combine) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
522 (let ((re (mapconcat |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
523 (lambda (e) (sregex--engine e 'or)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
524 exps "\\|"))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
525 (if (not (eq combine 'or)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
526 (concat "\\(?:" re "\\)") |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
527 re)))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
528 |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
529 (defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)")) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
530 |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
531 (defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps)))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
532 (defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?")) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
533 (defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*")) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
534 (defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+")) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
535 |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
536 (defun sregex--char (exps combine) (sregex--char-aux nil exps)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
537 (defun sregex--not-char (exps combine) (sregex--char-aux t exps)) |
| 22537 | 538 |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
539 (defun sregex--syntax (exps combine) (format "\\s%c" (car exps))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
540 (defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
541 |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
542 (defun sregex--regex (exps combine) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
543 (if combine (concat "\\(?:" (car exps) "\\)") (car exps))) |
| 22537 | 544 |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
545 (defun sregex--repeat (exps combine) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
546 (let* ((min (or (pop exps) 0)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
547 (minstr (number-to-string min)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
548 (max (pop exps))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
549 (concat (sregex--sequence exps 'suffix) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
550 (concat "\\{" minstr "," |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
551 (when max (number-to-string max)) "\\}")))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
552 |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
553 (defun sregex--char-range (start end) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
554 (let ((startc (char-to-string start)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
555 (endc (char-to-string end))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
556 (cond |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
557 ((> end (+ start 2)) (concat startc "-" endc)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
558 ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
559 ((> end start) (concat startc endc)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
560 (t startc)))) |
| 22537 | 561 |
| 562 (defun sregex--char-aux (complement args) | |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
563 ;; regex-opt does the same, we should join effort. |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
564 (let ((chars (make-bool-vector 256 nil))) ; Yeah, right! |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
565 (dolist (arg args) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
566 (cond ((integerp arg) (aset chars arg t)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
567 ((stringp arg) (mapcar (lambda (c) (aset chars c t)) arg)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
568 ((consp arg) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
569 (let ((start (car arg)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
570 (end (cdr arg))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
571 (when (> start end) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
572 (let ((tmp start)) (setq start end) (setq end tmp))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
573 ;; now start <= end |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
574 (let ((i start)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
575 (while (<= i end) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
576 (aset chars i t) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
577 (setq i (1+ i)))))))) |
| 22537 | 578 ;; now chars is a map of the characters in the class |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
579 (let ((caret (aref chars ?^)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
580 (dash (aref chars ?-)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
581 (class (if (aref chars ?\]) "]" ""))) |
| 22537 | 582 (aset chars ?^ nil) |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
583 (aset chars ?- nil) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
584 (aset chars ?\] nil) |
| 22537 | 585 |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
586 (let (start end) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
587 (dotimes (i 256) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
588 (if (aref chars i) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
589 (progn |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
590 (unless start (setq start i)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
591 (setq end i) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
592 (aset chars i nil)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
593 (when start |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
594 (setq class (concat class (sregex--char-range start end))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
595 (setq start nil)))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
596 (if start |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
597 (setq class (concat class (sregex--char-range start end))))) |
| 22537 | 598 |
|
29069
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
599 (if (> (length class) 0) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
600 (setq class (concat class (if caret "^") (if dash "-"))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
601 (setq class (concat class (if dash "-") (if caret "^")))) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
602 (if (and (not complement) (= (length class) 1)) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
603 (regexp-quote class) |
|
cb028b1d6345
Rewritten to take advantage of shy-groups and
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
22974
diff
changeset
|
604 (concat "[" (if complement "^") class "]"))))) |
| 22537 | 605 |
| 606 (provide 'sregex) | |
| 607 | |
| 52401 | 608 ;;; arch-tag: 460c1f5a-eb6e-42ec-a451-ffac78bdf492 |
| 22537 | 609 ;;; sregex.el ends here |
