Mercurial > emacs
annotate lisp/url/url-parse.el @ 74728:125d66299295
** jpff@cs.bath.ac.uk, Dec 7: Failure to submit second netnews message.
Closing in absence of followup from reporter.
author | Chong Yidong <cyd@stupidchicken.com> |
---|---|
date | Mon, 18 Dec 2006 16:29:59 +0000 |
parents | fd0232950658 |
children | e3694f1cb928 c71725faff1a |
rev | line source |
---|---|
54695 | 1 ;;; url-parse.el --- Uniform Resource Locator parser |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
2 |
64748
875dcc490074
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64084
diff
changeset
|
3 ;; Copyright (C) 1996, 1997, 1998, 1999, 2004, |
68640
e8a3fb527b77
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64748
diff
changeset
|
4 ;; 2005, 2006 Free Software Foundation, Inc. |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
5 |
54695 | 6 ;; Keywords: comm, data, processes |
7 | |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
8 ;; This file is part of GNU Emacs. |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
9 ;; |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
10 ;; GNU Emacs is free software; you can redistribute it and/or modify |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
11 ;; it under the terms of the GNU General Public License as published by |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
12 ;; the Free Software Foundation; either version 2, or (at your option) |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
13 ;; any later version. |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
14 ;; |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
15 ;; GNU Emacs is distributed in the hope that it will be useful, |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
18 ;; GNU General Public License for more details. |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
19 ;; |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
20 ;; You should have received a copy of the GNU General Public License |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the |
64084 | 22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
23 ;; Boston, MA 02110-1301, USA. | |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
24 |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
25 ;;; Commentary: |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
26 |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
27 ;;; Code: |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
28 |
54695 | 29 (require 'url-vars) |
30 | |
31 (autoload 'url-scheme-get-property "url-methods") | |
32 | |
33 (defmacro url-type (urlobj) | |
34 `(aref ,urlobj 0)) | |
35 | |
36 (defmacro url-user (urlobj) | |
37 `(aref ,urlobj 1)) | |
38 | |
39 (defmacro url-password (urlobj) | |
40 `(aref ,urlobj 2)) | |
41 | |
42 (defmacro url-host (urlobj) | |
43 `(aref ,urlobj 3)) | |
44 | |
45 (defmacro url-port (urlobj) | |
46 `(or (aref ,urlobj 4) | |
47 (if (url-fullness ,urlobj) | |
48 (url-scheme-get-property (url-type ,urlobj) 'default-port)))) | |
49 | |
50 (defmacro url-filename (urlobj) | |
51 `(aref ,urlobj 5)) | |
52 | |
53 (defmacro url-target (urlobj) | |
54 `(aref ,urlobj 6)) | |
55 | |
56 (defmacro url-attributes (urlobj) | |
57 `(aref ,urlobj 7)) | |
58 | |
59 (defmacro url-fullness (urlobj) | |
60 `(aref ,urlobj 8)) | |
61 | |
62 (defmacro url-set-type (urlobj type) | |
63 `(aset ,urlobj 0 ,type)) | |
64 | |
65 (defmacro url-set-user (urlobj user) | |
66 `(aset ,urlobj 1 ,user)) | |
67 | |
68 (defmacro url-set-password (urlobj pass) | |
69 `(aset ,urlobj 2 ,pass)) | |
70 | |
71 (defmacro url-set-host (urlobj host) | |
72 `(aset ,urlobj 3 ,host)) | |
73 | |
74 (defmacro url-set-port (urlobj port) | |
75 `(aset ,urlobj 4 ,port)) | |
76 | |
77 (defmacro url-set-filename (urlobj file) | |
78 `(aset ,urlobj 5 ,file)) | |
79 | |
80 (defmacro url-set-target (urlobj targ) | |
81 `(aset ,urlobj 6 ,targ)) | |
82 | |
83 (defmacro url-set-attributes (urlobj targ) | |
84 `(aset ,urlobj 7 ,targ)) | |
85 | |
86 (defmacro url-set-full (urlobj val) | |
87 `(aset ,urlobj 8 ,val)) | |
64748
875dcc490074
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64084
diff
changeset
|
88 |
54695 | 89 ;;;###autoload |
90 (defun url-recreate-url (urlobj) | |
54802 | 91 "Recreate a URL string from the parsed URLOBJ." |
54695 | 92 (concat (url-type urlobj) ":" (if (url-host urlobj) "//" "") |
93 (if (url-user urlobj) | |
94 (concat (url-user urlobj) | |
95 (if (url-password urlobj) | |
96 (concat ":" (url-password urlobj))) | |
97 "@")) | |
98 (url-host urlobj) | |
99 (if (and (url-port urlobj) | |
100 (not (equal (url-port urlobj) | |
101 (url-scheme-get-property (url-type urlobj) 'default-port)))) | |
102 (format ":%d" (url-port urlobj))) | |
72589
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
103 (or (url-filename urlobj) "/") |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
104 (url-recreate-url-attributes urlobj) |
54695 | 105 (if (url-target urlobj) |
72589
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
106 (concat "#" (url-target urlobj))))) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
107 |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
108 (defun url-recreate-url-attributes (urlobj) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
109 "Recreate the attributes of an URL string from the parsed URLOBJ." |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
110 (when (url-attributes urlobj) |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
111 (concat "?" |
72589
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
112 (mapconcat (lambda (x) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
113 (if (cdr x) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
114 (concat (car x) "=" (cdr x)) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
115 (car x))) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
116 (url-attributes urlobj) ";")))) |
54695 | 117 |
118 ;;;###autoload | |
119 (defun url-generic-parse-url (url) | |
120 "Return a vector of the parts of URL. | |
121 Format is: | |
54802 | 122 \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]" |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
123 ;; See RFC 3986. |
54695 | 124 (cond |
125 ((null url) | |
126 (make-vector 9 nil)) | |
127 ((or (not (string-match url-nonrelative-link url)) | |
128 (= ?/ (string-to-char url))) | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
129 ;; This isn't correct, as a relative URL can be a fragment link |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
130 ;; (e.g. "#foo") and many other things (see section 4.2). |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
131 ;; However, let's not fix something that isn't broken, especially |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
132 ;; when close to a release. |
54695 | 133 (let ((retval (make-vector 9 nil))) |
134 (url-set-filename retval url) | |
135 (url-set-full retval nil) | |
136 retval)) | |
137 (t | |
74014
fd0232950658
(url-generic-parse-url): Use with-temp-buffer.
Magnus Henoch <mange@freemail.hu>
parents:
73313
diff
changeset
|
138 (with-temp-buffer |
54695 | 139 (set-syntax-table url-parse-syntax-table) |
140 (let ((save-pos nil) | |
141 (prot nil) | |
142 (user nil) | |
143 (pass nil) | |
144 (host nil) | |
145 (port nil) | |
146 (file nil) | |
147 (refs nil) | |
148 (attr nil) | |
149 (full nil) | |
150 (inhibit-read-only t)) | |
151 (erase-buffer) | |
152 (insert url) | |
153 (goto-char (point-min)) | |
154 (setq save-pos (point)) | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
155 |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
156 ;; 3.1. Scheme |
54695 | 157 (if (not (looking-at "//")) |
158 (progn | |
159 (skip-chars-forward "a-zA-Z+.\\-") | |
160 (downcase-region save-pos (point)) | |
161 (setq prot (buffer-substring save-pos (point))) | |
162 (skip-chars-forward ":") | |
163 (setq save-pos (point)))) | |
164 | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
165 ;; 3.2. Authority |
54695 | 166 (if (looking-at "//") |
167 (progn | |
168 (setq full t) | |
169 (forward-char 2) | |
170 (setq save-pos (point)) | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
171 (skip-chars-forward "^/\\?#") |
54695 | 172 (setq host (buffer-substring save-pos (point))) |
173 (if (string-match "^\\([^@]+\\)@" host) | |
174 (setq user (match-string 1 host) | |
175 host (substring host (match-end 0) nil))) | |
176 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user)) | |
177 (setq pass (match-string 2 user) | |
178 user (match-string 1 user))) | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
179 ;; This gives wrong results for IPv6 literal addresses. |
54695 | 180 (if (string-match ":\\([0-9+]+\\)" host) |
62400
e30c08177a3b
Replace `string-to-int' by `string-to-number'.
Juanma Barranquero <lekktu@gmail.com>
parents:
57427
diff
changeset
|
181 (setq port (string-to-number (match-string 1 host)) |
54695 | 182 host (substring host 0 (match-beginning 0)))) |
183 (if (string-match ":$" host) | |
184 (setq host (substring host 0 (match-beginning 0)))) | |
185 (setq host (downcase host) | |
186 save-pos (point)))) | |
187 | |
188 (if (not port) | |
189 (setq port (url-scheme-get-property prot 'default-port))) | |
190 | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
191 ;; 3.3. Path |
54695 | 192 (setq save-pos (point)) |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
193 (skip-chars-forward "^#?") |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
194 (setq file (buffer-substring save-pos (point))) |
54695 | 195 |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
196 ;; 3.4. Query |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
197 (when (looking-at "\\?") |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
198 (forward-char 1) |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
199 (setq save-pos (point)) |
54695 | 200 (skip-chars-forward "^#") |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
201 ;; RFC 3986 specifies no general way of parsing the query |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
202 ;; string, but `url-parse-args' seems universal enough. |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
203 (setq attr (url-parse-args (buffer-substring save-pos (point)) t) |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
204 attr (nreverse attr))) |
54695 | 205 |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
206 ;; 3.5. Fragment |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
207 (when (looking-at "#") |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
208 (forward-char 1) |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
209 (setq refs (buffer-substring (point) (point-max)))) |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
210 |
54695 | 211 (if (and host (string-match "%[0-9][0-9]" host)) |
212 (setq host (url-unhex-string host))) | |
213 (vector prot user pass host port file refs attr full)))))) | |
214 | |
215 (provide 'url-parse) | |
54699 | 216 |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
217 ;; arch-tag: f338325f-71ab-4bee-93cc-78fb9a03d403 |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
218 ;;; url-parse.el ends here |