Mercurial > emacs
changeset 73313:8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
non-empty query component. Untangle path, query and fragment parsing
code. Add references to RFC 3986 in comments.
(url-recreate-url-attributes): Start query string with "?", not ";".
author | Magnus Henoch <mange@freemail.hu> |
---|---|
date | Mon, 09 Oct 2006 20:10:13 +0000 |
parents | c56fd5a4e5af |
children | 6c5a67740a8e |
files | lisp/url/ChangeLog lisp/url/url-parse.el |
diffstat | 2 files changed, 36 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/url/ChangeLog Mon Oct 09 19:58:14 2006 +0000 +++ b/lisp/url/ChangeLog Mon Oct 09 20:10:13 2006 +0000 @@ -1,3 +1,12 @@ +2006-10-09 Magnus Henoch <mange@freemail.hu> + + * url-parse.el (url-generic-parse-url): Handle URLs with empty + path component and non-empty query component. Untangle path, + query and fragment parsing code. Add references to RFC 3986 in + comments. + (url-recreate-url-attributes): Start query string with "?", not + ";". + 2006-09-20 Stefan Monnier <monnier@iro.umontreal.ca> * url-dav.el (url-dav-file-attributes): Simplify.
--- a/lisp/url/url-parse.el Mon Oct 09 19:58:14 2006 +0000 +++ b/lisp/url/url-parse.el Mon Oct 09 20:10:13 2006 +0000 @@ -108,7 +108,7 @@ (defun url-recreate-url-attributes (urlobj) "Recreate the attributes of an URL string from the parsed URLOBJ." (when (url-attributes urlobj) - (concat ";" + (concat "?" (mapconcat (lambda (x) (if (cdr x) (concat (car x) "=" (cdr x)) @@ -120,11 +120,16 @@ "Return a vector of the parts of URL. Format is: \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]" + ;; See RFC 3986. (cond ((null url) (make-vector 9 nil)) ((or (not (string-match url-nonrelative-link url)) (= ?/ (string-to-char url))) + ;; This isn't correct, as a relative URL can be a fragment link + ;; (e.g. "#foo") and many other things (see section 4.2). + ;; However, let's not fix something that isn't broken, especially + ;; when close to a release. (let ((retval (make-vector 9 nil))) (url-set-filename retval url) (url-set-full retval nil) @@ -148,6 +153,8 @@ (insert url) (goto-char (point-min)) (setq save-pos (point)) + + ;; 3.1. Scheme (if (not (looking-at "//")) (progn (skip-chars-forward "a-zA-Z+.\\-") @@ -156,13 +163,13 @@ (skip-chars-forward ":") (setq save-pos (point)))) - ;; We are doing a fully specified URL, with hostname and all + ;; 3.2. Authority (if (looking-at "//") (progn (setq full t) (forward-char 2) (setq save-pos (point)) - (skip-chars-forward "^/") + (skip-chars-forward "^/\\?#") (setq host (buffer-substring save-pos (point))) (if (string-match "^\\([^@]+\\)@" host) (setq user (match-string 1 host) @@ -170,6 +177,7 @@ (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user)) (setq pass (match-string 2 user) user (match-string 1 user))) + ;; This gives wrong results for IPv6 literal addresses. (if (string-match ":\\([0-9+]+\\)" host) (setq port (string-to-number (match-string 1 host)) host (substring host 0 (match-beginning 0)))) @@ -181,29 +189,26 @@ (if (not port) (setq port (url-scheme-get-property prot 'default-port))) - ;; Gross hack to preserve ';' in data URLs - + ;; 3.3. Path (setq save-pos (point)) + (skip-chars-forward "^#?") + (setq file (buffer-substring save-pos (point))) - (if (string= "data" prot) - (goto-char (point-max)) - ;; Now check for references + ;; 3.4. Query + (when (looking-at "\\?") + (forward-char 1) + (setq save-pos (point)) (skip-chars-forward "^#") - (if (eobp) - nil - (delete-region - (point) - (progn - (skip-chars-forward "#") - (setq refs (buffer-substring (point) (point-max))) - (point-max)))) - (goto-char save-pos) - (skip-chars-forward "^;") - (if (not (eobp)) - (setq attr (url-parse-args (buffer-substring (point) (point-max)) t) - attr (nreverse attr)))) + ;; RFC 3986 specifies no general way of parsing the query + ;; string, but `url-parse-args' seems universal enough. + (setq attr (url-parse-args (buffer-substring save-pos (point)) t) + attr (nreverse attr))) - (setq file (buffer-substring save-pos (point))) + ;; 3.5. Fragment + (when (looking-at "#") + (forward-char 1) + (setq refs (buffer-substring (point) (point-max)))) + (if (and host (string-match "%[0-9][0-9]" host)) (setq host (url-unhex-string host))) (vector prot user pass host port file refs attr full))))))