changeset 51148:f59aeee43725

(split-string): Implement specification that splitting on explicit separators retains null fields. Add new argument OMIT-NULLS. Special-case (split-string "a string").
author Juanma Barranquero <lekktu@gmail.com>
date Thu, 22 May 2003 20:59:57 +0000
parents c8319990e80a
children 337c29aec7ce
files lisp/subr.el
diffstat 1 files changed, 41 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/subr.el	Thu May 22 20:01:37 2003 +0000
+++ b/lisp/subr.el	Thu May 22 20:59:57 2003 +0000
@@ -1820,19 +1820,45 @@
 	(buffer-substring-no-properties (match-beginning num)
 					(match-end num)))))
 
-(defun split-string (string &optional separators)
-  "Splits STRING into substrings where there are matches for SEPARATORS.
-Each match for SEPARATORS is a splitting point.
-The substrings between the splitting points are made into a list
+(defconst split-string-default-separators "[ \f\t\n\r\v]+"
+  "The default value of separators for `split-string'.
+
+A regexp matching strings of whitespace.  May be locale-dependent
+\(as yet unimplemented).  Should not match non-breaking spaces.
+
+Warning: binding this to a different value and using it as default is
+likely to have undesired semantics.")
+
+;; The specification says that if both SEPARATORS and OMIT-NULLS are
+;; defaulted, OMIT-NULLS should be treated as t.  Simplifying the logical
+;; expression leads to the equivalent implementation that if SEPARATORS
+;; is defaulted, OMIT-NULLS is treated as t.
+(defun split-string (string &optional separators omit-nulls)
+  "Splits STRING into substrings bounded by matches for SEPARATORS.
+
+The beginning and end of STRING, and each match for SEPARATORS, are
+splitting points.  The substrings matching SEPARATORS are removed, and
+the substrings between the splitting points are collected as a list,
 which is returned.
-If SEPARATORS is absent, it defaults to \"[ \\f\\t\\n\\r\\v]+\".
+
+If SEPARATORS is non-nil, it should be a regular expression matching text
+which separates, but is not part of, the substrings.  If nil it defaults to
+`split-string-default-separators', normally \"[ \\f\\t\\n\\r\\v]+\", and
+OMIT-NULLS is forced to t.
 
-If there is match for SEPARATORS at the beginning of STRING, we do not
-include a null substring for that.  Likewise, if there is a match
-at the end of STRING, we don't include a null substring for that.
+If OMIT-NULLs is t, zero-length substrings are omitted from the list \(so
+that for the default value of SEPARATORS leading and trailing whitespace
+are effectively trimmed).  If nil, all zero-length substrings are retained,
+which correctly parses CSV format, for example.
+
+Note that the effect of `(split-string STRING)' is the same as
+`(split-string STRING split-string-default-separators t)').  In the rare
+case that you wish to retain zero-length substrings when splitting on
+whitespace, use `(split-string STRING split-string-default-separators)'.
 
 Modifies the match data; use `save-match-data' if necessary."
-  (let ((rexp (or separators "[ \f\t\n\r\v]+"))
+  (let ((keep-nulls (not (if separators omit-nulls t)))
+	(rexp (or separators split-string-default-separators))
 	(start 0)
 	notfirst
 	(list nil))
@@ -1841,16 +1867,14 @@
 				       (= start (match-beginning 0))
 				       (< start (length string)))
 				  (1+ start) start))
-		(< (match-beginning 0) (length string)))
+		(< start (length string)))
       (setq notfirst t)
-      (or (eq (match-beginning 0) 0)
-	  (and (eq (match-beginning 0) (match-end 0))
-	       (eq (match-beginning 0) start))
+      (if (or keep-nulls (< start (match-beginning 0)))
 	  (setq list
 		(cons (substring string start (match-beginning 0))
 		      list)))
       (setq start (match-end 0)))
-    (or (eq start (length string))
+    (if (or keep-nulls (< start (length string)))
 	(setq list
 	      (cons (substring string start)
 		    list)))
@@ -1868,7 +1892,7 @@
     newstr))
 
 (defun replace-regexp-in-string (regexp rep string &optional
-					fixedcase literal subexp start)
+                                 fixedcase literal subexp start)
   "Replace all matches for REGEXP with REP in STRING.
 
 Return a new string containing the replacements.
@@ -1917,7 +1941,7 @@
 				       rep
 				     (funcall rep (match-string 0 str)))
 				   fixedcase literal str subexp)
-		    (cons (substring string start mb) ; unmatched prefix
+		    (cons (substring string start mb)       ; unmatched prefix
 			  matches)))
 	(setq start me))
       ;; Reconstruct a string from the pieces.
@@ -2157,7 +2181,7 @@
 If TOGGLE has a `:menu-tag', that is used for the menu item's label."
   (unless (memq toggle minor-mode-list)
     (push toggle minor-mode-list))
-    
+
   (unless toggle-fun (setq toggle-fun toggle))
   ;; Add the name to the minor-mode-alist.
   (when name