Mercurial > emacs
changeset 51148:f59aeee43725
(split-string): Implement specification that splitting on explicit separators
retains null fields. Add new argument OMIT-NULLS. Special-case (split-string
"a string").
author | Juanma Barranquero <lekktu@gmail.com> |
---|---|
date | Thu, 22 May 2003 20:59:57 +0000 |
parents | c8319990e80a |
children | 337c29aec7ce |
files | lisp/subr.el |
diffstat | 1 files changed, 41 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/subr.el Thu May 22 20:01:37 2003 +0000 +++ b/lisp/subr.el Thu May 22 20:59:57 2003 +0000 @@ -1820,19 +1820,45 @@ (buffer-substring-no-properties (match-beginning num) (match-end num))))) -(defun split-string (string &optional separators) - "Splits STRING into substrings where there are matches for SEPARATORS. -Each match for SEPARATORS is a splitting point. -The substrings between the splitting points are made into a list +(defconst split-string-default-separators "[ \f\t\n\r\v]+" + "The default value of separators for `split-string'. + +A regexp matching strings of whitespace. May be locale-dependent +\(as yet unimplemented). Should not match non-breaking spaces. + +Warning: binding this to a different value and using it as default is +likely to have undesired semantics.") + +;; The specification says that if both SEPARATORS and OMIT-NULLS are +;; defaulted, OMIT-NULLS should be treated as t. Simplifying the logical +;; expression leads to the equivalent implementation that if SEPARATORS +;; is defaulted, OMIT-NULLS is treated as t. +(defun split-string (string &optional separators omit-nulls) + "Splits STRING into substrings bounded by matches for SEPARATORS. + +The beginning and end of STRING, and each match for SEPARATORS, are +splitting points. The substrings matching SEPARATORS are removed, and +the substrings between the splitting points are collected as a list, which is returned. -If SEPARATORS is absent, it defaults to \"[ \\f\\t\\n\\r\\v]+\". + +If SEPARATORS is non-nil, it should be a regular expression matching text +which separates, but is not part of, the substrings. If nil it defaults to +`split-string-default-separators', normally \"[ \\f\\t\\n\\r\\v]+\", and +OMIT-NULLS is forced to t. -If there is match for SEPARATORS at the beginning of STRING, we do not -include a null substring for that. Likewise, if there is a match -at the end of STRING, we don't include a null substring for that. +If OMIT-NULLs is t, zero-length substrings are omitted from the list \(so +that for the default value of SEPARATORS leading and trailing whitespace +are effectively trimmed). If nil, all zero-length substrings are retained, +which correctly parses CSV format, for example. + +Note that the effect of `(split-string STRING)' is the same as +`(split-string STRING split-string-default-separators t)'). In the rare +case that you wish to retain zero-length substrings when splitting on +whitespace, use `(split-string STRING split-string-default-separators)'. Modifies the match data; use `save-match-data' if necessary." - (let ((rexp (or separators "[ \f\t\n\r\v]+")) + (let ((keep-nulls (not (if separators omit-nulls t))) + (rexp (or separators split-string-default-separators)) (start 0) notfirst (list nil)) @@ -1841,16 +1867,14 @@ (= start (match-beginning 0)) (< start (length string))) (1+ start) start)) - (< (match-beginning 0) (length string))) + (< start (length string))) (setq notfirst t) - (or (eq (match-beginning 0) 0) - (and (eq (match-beginning 0) (match-end 0)) - (eq (match-beginning 0) start)) + (if (or keep-nulls (< start (match-beginning 0))) (setq list (cons (substring string start (match-beginning 0)) list))) (setq start (match-end 0))) - (or (eq start (length string)) + (if (or keep-nulls (< start (length string))) (setq list (cons (substring string start) list))) @@ -1868,7 +1892,7 @@ newstr)) (defun replace-regexp-in-string (regexp rep string &optional - fixedcase literal subexp start) + fixedcase literal subexp start) "Replace all matches for REGEXP with REP in STRING. Return a new string containing the replacements. @@ -1917,7 +1941,7 @@ rep (funcall rep (match-string 0 str))) fixedcase literal str subexp) - (cons (substring string start mb) ; unmatched prefix + (cons (substring string start mb) ; unmatched prefix matches))) (setq start me)) ;; Reconstruct a string from the pieces. @@ -2157,7 +2181,7 @@ If TOGGLE has a `:menu-tag', that is used for the menu item's label." (unless (memq toggle minor-mode-list) (push toggle minor-mode-list)) - + (unless toggle-fun (setq toggle-fun toggle)) ;; Add the name to the minor-mode-alist. (when name