Mercurial > emacs
comparison lisp/url/url-parse.el @ 73313:8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
non-empty query component. Untangle path, query and fragment parsing
code. Add references to RFC 3986 in comments.
(url-recreate-url-attributes): Start query string with "?", not ";".
author | Magnus Henoch <mange@freemail.hu> |
---|---|
date | Mon, 09 Oct 2006 20:10:13 +0000 |
parents | f6c1ac4c14c7 |
children | fd0232950658 2d56e13fd23d |
comparison
equal
deleted
inserted
replaced
73312:c56fd5a4e5af | 73313:8c2a0bfc98b0 |
---|---|
106 (concat "#" (url-target urlobj))))) | 106 (concat "#" (url-target urlobj))))) |
107 | 107 |
108 (defun url-recreate-url-attributes (urlobj) | 108 (defun url-recreate-url-attributes (urlobj) |
109 "Recreate the attributes of an URL string from the parsed URLOBJ." | 109 "Recreate the attributes of an URL string from the parsed URLOBJ." |
110 (when (url-attributes urlobj) | 110 (when (url-attributes urlobj) |
111 (concat ";" | 111 (concat "?" |
112 (mapconcat (lambda (x) | 112 (mapconcat (lambda (x) |
113 (if (cdr x) | 113 (if (cdr x) |
114 (concat (car x) "=" (cdr x)) | 114 (concat (car x) "=" (cdr x)) |
115 (car x))) | 115 (car x))) |
116 (url-attributes urlobj) ";")))) | 116 (url-attributes urlobj) ";")))) |
118 ;;;###autoload | 118 ;;;###autoload |
119 (defun url-generic-parse-url (url) | 119 (defun url-generic-parse-url (url) |
120 "Return a vector of the parts of URL. | 120 "Return a vector of the parts of URL. |
121 Format is: | 121 Format is: |
122 \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]" | 122 \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]" |
123 ;; See RFC 3986. | |
123 (cond | 124 (cond |
124 ((null url) | 125 ((null url) |
125 (make-vector 9 nil)) | 126 (make-vector 9 nil)) |
126 ((or (not (string-match url-nonrelative-link url)) | 127 ((or (not (string-match url-nonrelative-link url)) |
127 (= ?/ (string-to-char url))) | 128 (= ?/ (string-to-char url))) |
129 ;; This isn't correct, as a relative URL can be a fragment link | |
130 ;; (e.g. "#foo") and many other things (see section 4.2). | |
131 ;; However, let's not fix something that isn't broken, especially | |
132 ;; when close to a release. | |
128 (let ((retval (make-vector 9 nil))) | 133 (let ((retval (make-vector 9 nil))) |
129 (url-set-filename retval url) | 134 (url-set-filename retval url) |
130 (url-set-full retval nil) | 135 (url-set-full retval nil) |
131 retval)) | 136 retval)) |
132 (t | 137 (t |
146 (inhibit-read-only t)) | 151 (inhibit-read-only t)) |
147 (erase-buffer) | 152 (erase-buffer) |
148 (insert url) | 153 (insert url) |
149 (goto-char (point-min)) | 154 (goto-char (point-min)) |
150 (setq save-pos (point)) | 155 (setq save-pos (point)) |
156 | |
157 ;; 3.1. Scheme | |
151 (if (not (looking-at "//")) | 158 (if (not (looking-at "//")) |
152 (progn | 159 (progn |
153 (skip-chars-forward "a-zA-Z+.\\-") | 160 (skip-chars-forward "a-zA-Z+.\\-") |
154 (downcase-region save-pos (point)) | 161 (downcase-region save-pos (point)) |
155 (setq prot (buffer-substring save-pos (point))) | 162 (setq prot (buffer-substring save-pos (point))) |
156 (skip-chars-forward ":") | 163 (skip-chars-forward ":") |
157 (setq save-pos (point)))) | 164 (setq save-pos (point)))) |
158 | 165 |
159 ;; We are doing a fully specified URL, with hostname and all | 166 ;; 3.2. Authority |
160 (if (looking-at "//") | 167 (if (looking-at "//") |
161 (progn | 168 (progn |
162 (setq full t) | 169 (setq full t) |
163 (forward-char 2) | 170 (forward-char 2) |
164 (setq save-pos (point)) | 171 (setq save-pos (point)) |
165 (skip-chars-forward "^/") | 172 (skip-chars-forward "^/\\?#") |
166 (setq host (buffer-substring save-pos (point))) | 173 (setq host (buffer-substring save-pos (point))) |
167 (if (string-match "^\\([^@]+\\)@" host) | 174 (if (string-match "^\\([^@]+\\)@" host) |
168 (setq user (match-string 1 host) | 175 (setq user (match-string 1 host) |
169 host (substring host (match-end 0) nil))) | 176 host (substring host (match-end 0) nil))) |
170 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user)) | 177 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user)) |
171 (setq pass (match-string 2 user) | 178 (setq pass (match-string 2 user) |
172 user (match-string 1 user))) | 179 user (match-string 1 user))) |
180 ;; This gives wrong results for IPv6 literal addresses. | |
173 (if (string-match ":\\([0-9+]+\\)" host) | 181 (if (string-match ":\\([0-9+]+\\)" host) |
174 (setq port (string-to-number (match-string 1 host)) | 182 (setq port (string-to-number (match-string 1 host)) |
175 host (substring host 0 (match-beginning 0)))) | 183 host (substring host 0 (match-beginning 0)))) |
176 (if (string-match ":$" host) | 184 (if (string-match ":$" host) |
177 (setq host (substring host 0 (match-beginning 0)))) | 185 (setq host (substring host 0 (match-beginning 0)))) |
179 save-pos (point)))) | 187 save-pos (point)))) |
180 | 188 |
181 (if (not port) | 189 (if (not port) |
182 (setq port (url-scheme-get-property prot 'default-port))) | 190 (setq port (url-scheme-get-property prot 'default-port))) |
183 | 191 |
184 ;; Gross hack to preserve ';' in data URLs | 192 ;; 3.3. Path |
185 | |
186 (setq save-pos (point)) | 193 (setq save-pos (point)) |
187 | 194 (skip-chars-forward "^#?") |
188 (if (string= "data" prot) | 195 (setq file (buffer-substring save-pos (point))) |
189 (goto-char (point-max)) | 196 |
190 ;; Now check for references | 197 ;; 3.4. Query |
198 (when (looking-at "\\?") | |
199 (forward-char 1) | |
200 (setq save-pos (point)) | |
191 (skip-chars-forward "^#") | 201 (skip-chars-forward "^#") |
192 (if (eobp) | 202 ;; RFC 3986 specifies no general way of parsing the query |
193 nil | 203 ;; string, but `url-parse-args' seems universal enough. |
194 (delete-region | 204 (setq attr (url-parse-args (buffer-substring save-pos (point)) t) |
195 (point) | 205 attr (nreverse attr))) |
196 (progn | 206 |
197 (skip-chars-forward "#") | 207 ;; 3.5. Fragment |
198 (setq refs (buffer-substring (point) (point-max))) | 208 (when (looking-at "#") |
199 (point-max)))) | 209 (forward-char 1) |
200 (goto-char save-pos) | 210 (setq refs (buffer-substring (point) (point-max)))) |
201 (skip-chars-forward "^;") | 211 |
202 (if (not (eobp)) | |
203 (setq attr (url-parse-args (buffer-substring (point) (point-max)) t) | |
204 attr (nreverse attr)))) | |
205 | |
206 (setq file (buffer-substring save-pos (point))) | |
207 (if (and host (string-match "%[0-9][0-9]" host)) | 212 (if (and host (string-match "%[0-9][0-9]" host)) |
208 (setq host (url-unhex-string host))) | 213 (setq host (url-unhex-string host))) |
209 (vector prot user pass host port file refs attr full)))))) | 214 (vector prot user pass host port file refs attr full)))))) |
210 | 215 |
211 (provide 'url-parse) | 216 (provide 'url-parse) |