comparison lisp/url/url-parse.el @ 73313:8c2a0bfc98b0

(url-generic-parse-url): Handle URLs with empty path component and non-empty query component. Untangle path, query and fragment parsing code. Add references to RFC 3986 in comments. (url-recreate-url-attributes): Start query string with "?", not ";".
author Magnus Henoch <mange@freemail.hu>
date Mon, 09 Oct 2006 20:10:13 +0000
parents f6c1ac4c14c7
children fd0232950658 2d56e13fd23d
comparison
equal deleted inserted replaced
73312:c56fd5a4e5af 73313:8c2a0bfc98b0
106 (concat "#" (url-target urlobj))))) 106 (concat "#" (url-target urlobj)))))
107 107
108 (defun url-recreate-url-attributes (urlobj) 108 (defun url-recreate-url-attributes (urlobj)
109 "Recreate the attributes of an URL string from the parsed URLOBJ." 109 "Recreate the attributes of an URL string from the parsed URLOBJ."
110 (when (url-attributes urlobj) 110 (when (url-attributes urlobj)
111 (concat ";" 111 (concat "?"
112 (mapconcat (lambda (x) 112 (mapconcat (lambda (x)
113 (if (cdr x) 113 (if (cdr x)
114 (concat (car x) "=" (cdr x)) 114 (concat (car x) "=" (cdr x))
115 (car x))) 115 (car x)))
116 (url-attributes urlobj) ";")))) 116 (url-attributes urlobj) ";"))))
118 ;;;###autoload 118 ;;;###autoload
119 (defun url-generic-parse-url (url) 119 (defun url-generic-parse-url (url)
120 "Return a vector of the parts of URL. 120 "Return a vector of the parts of URL.
121 Format is: 121 Format is:
122 \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]" 122 \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]"
123 ;; See RFC 3986.
123 (cond 124 (cond
124 ((null url) 125 ((null url)
125 (make-vector 9 nil)) 126 (make-vector 9 nil))
126 ((or (not (string-match url-nonrelative-link url)) 127 ((or (not (string-match url-nonrelative-link url))
127 (= ?/ (string-to-char url))) 128 (= ?/ (string-to-char url)))
129 ;; This isn't correct, as a relative URL can be a fragment link
130 ;; (e.g. "#foo") and many other things (see section 4.2).
131 ;; However, let's not fix something that isn't broken, especially
132 ;; when close to a release.
128 (let ((retval (make-vector 9 nil))) 133 (let ((retval (make-vector 9 nil)))
129 (url-set-filename retval url) 134 (url-set-filename retval url)
130 (url-set-full retval nil) 135 (url-set-full retval nil)
131 retval)) 136 retval))
132 (t 137 (t
146 (inhibit-read-only t)) 151 (inhibit-read-only t))
147 (erase-buffer) 152 (erase-buffer)
148 (insert url) 153 (insert url)
149 (goto-char (point-min)) 154 (goto-char (point-min))
150 (setq save-pos (point)) 155 (setq save-pos (point))
156
157 ;; 3.1. Scheme
151 (if (not (looking-at "//")) 158 (if (not (looking-at "//"))
152 (progn 159 (progn
153 (skip-chars-forward "a-zA-Z+.\\-") 160 (skip-chars-forward "a-zA-Z+.\\-")
154 (downcase-region save-pos (point)) 161 (downcase-region save-pos (point))
155 (setq prot (buffer-substring save-pos (point))) 162 (setq prot (buffer-substring save-pos (point)))
156 (skip-chars-forward ":") 163 (skip-chars-forward ":")
157 (setq save-pos (point)))) 164 (setq save-pos (point))))
158 165
159 ;; We are doing a fully specified URL, with hostname and all 166 ;; 3.2. Authority
160 (if (looking-at "//") 167 (if (looking-at "//")
161 (progn 168 (progn
162 (setq full t) 169 (setq full t)
163 (forward-char 2) 170 (forward-char 2)
164 (setq save-pos (point)) 171 (setq save-pos (point))
165 (skip-chars-forward "^/") 172 (skip-chars-forward "^/\\?#")
166 (setq host (buffer-substring save-pos (point))) 173 (setq host (buffer-substring save-pos (point)))
167 (if (string-match "^\\([^@]+\\)@" host) 174 (if (string-match "^\\([^@]+\\)@" host)
168 (setq user (match-string 1 host) 175 (setq user (match-string 1 host)
169 host (substring host (match-end 0) nil))) 176 host (substring host (match-end 0) nil)))
170 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user)) 177 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user))
171 (setq pass (match-string 2 user) 178 (setq pass (match-string 2 user)
172 user (match-string 1 user))) 179 user (match-string 1 user)))
180 ;; This gives wrong results for IPv6 literal addresses.
173 (if (string-match ":\\([0-9+]+\\)" host) 181 (if (string-match ":\\([0-9+]+\\)" host)
174 (setq port (string-to-number (match-string 1 host)) 182 (setq port (string-to-number (match-string 1 host))
175 host (substring host 0 (match-beginning 0)))) 183 host (substring host 0 (match-beginning 0))))
176 (if (string-match ":$" host) 184 (if (string-match ":$" host)
177 (setq host (substring host 0 (match-beginning 0)))) 185 (setq host (substring host 0 (match-beginning 0))))
179 save-pos (point)))) 187 save-pos (point))))
180 188
181 (if (not port) 189 (if (not port)
182 (setq port (url-scheme-get-property prot 'default-port))) 190 (setq port (url-scheme-get-property prot 'default-port)))
183 191
184 ;; Gross hack to preserve ';' in data URLs 192 ;; 3.3. Path
185
186 (setq save-pos (point)) 193 (setq save-pos (point))
187 194 (skip-chars-forward "^#?")
188 (if (string= "data" prot) 195 (setq file (buffer-substring save-pos (point)))
189 (goto-char (point-max)) 196
190 ;; Now check for references 197 ;; 3.4. Query
198 (when (looking-at "\\?")
199 (forward-char 1)
200 (setq save-pos (point))
191 (skip-chars-forward "^#") 201 (skip-chars-forward "^#")
192 (if (eobp) 202 ;; RFC 3986 specifies no general way of parsing the query
193 nil 203 ;; string, but `url-parse-args' seems universal enough.
194 (delete-region 204 (setq attr (url-parse-args (buffer-substring save-pos (point)) t)
195 (point) 205 attr (nreverse attr)))
196 (progn 206
197 (skip-chars-forward "#") 207 ;; 3.5. Fragment
198 (setq refs (buffer-substring (point) (point-max))) 208 (when (looking-at "#")
199 (point-max)))) 209 (forward-char 1)
200 (goto-char save-pos) 210 (setq refs (buffer-substring (point) (point-max))))
201 (skip-chars-forward "^;") 211
202 (if (not (eobp))
203 (setq attr (url-parse-args (buffer-substring (point) (point-max)) t)
204 attr (nreverse attr))))
205
206 (setq file (buffer-substring save-pos (point)))
207 (if (and host (string-match "%[0-9][0-9]" host)) 212 (if (and host (string-match "%[0-9][0-9]" host))
208 (setq host (url-unhex-string host))) 213 (setq host (url-unhex-string host)))
209 (vector prot user pass host port file refs attr full)))))) 214 (vector prot user pass host port file refs attr full))))))
210 215
211 (provide 'url-parse) 216 (provide 'url-parse)