Mercurial > emacs
annotate lisp/url/url-parse.el @ 99060:8d5ae00a1ff5
* basic.texi (Arguments): Explain how to insert multiple digits.
author | Chong Yidong <cyd@stupidchicken.com> |
---|---|
date | Thu, 23 Oct 2008 01:05:21 +0000 |
parents | 8259d0d8e107 |
children | a9dc0e7c3f2b |
rev | line source |
---|---|
54695 | 1 ;;; url-parse.el --- Uniform Resource Locator parser |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
2 |
64748
875dcc490074
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64084
diff
changeset
|
3 ;; Copyright (C) 1996, 1997, 1998, 1999, 2004, |
79720 | 4 ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc. |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
5 |
54695 | 6 ;; Keywords: comm, data, processes |
7 | |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
8 ;; This file is part of GNU Emacs. |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
9 ;; |
94668
8259d0d8e107
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
88005
diff
changeset
|
10 ;; GNU Emacs is free software: you can redistribute it and/or modify |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
11 ;; it under the terms of the GNU General Public License as published by |
94668
8259d0d8e107
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
88005
diff
changeset
|
12 ;; the Free Software Foundation, either version 3 of the License, or |
8259d0d8e107
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
88005
diff
changeset
|
13 ;; (at your option) any later version. |
8259d0d8e107
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
88005
diff
changeset
|
14 |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
15 ;; GNU Emacs is distributed in the hope that it will be useful, |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
18 ;; GNU General Public License for more details. |
94668
8259d0d8e107
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
88005
diff
changeset
|
19 |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
20 ;; You should have received a copy of the GNU General Public License |
94668
8259d0d8e107
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
88005
diff
changeset
|
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
22 |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
23 ;;; Commentary: |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
24 |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
25 ;;; Code: |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
26 |
54695 | 27 (require 'url-vars) |
83823
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
28 (eval-when-compile (require 'cl)) |
54695 | 29 |
30 (autoload 'url-scheme-get-property "url-methods") | |
31 | |
83823
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
32 (defstruct (url |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
33 (:constructor nil) |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
34 (:constructor url-parse-make-urlobj |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
35 (&optional type user password host portspec filename |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
36 target attributes fullness)) |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
37 (:copier nil)) |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
38 type user password host portspec filename target attributes fullness) |
54695 | 39 |
83823
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
40 (defsubst url-port (urlobj) |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
41 (or (url-portspec urlobj) |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
42 (if (url-fullness urlobj) |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
43 (url-scheme-get-property (url-type urlobj) 'default-port)))) |
54695 | 44 |
83823
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
45 (defsetf url-port (urlobj) (port) `(setf (url-portspec ,urlobj) ,port)) |
64748
875dcc490074
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64084
diff
changeset
|
46 |
54695 | 47 ;;;###autoload |
48 (defun url-recreate-url (urlobj) | |
54802 | 49 "Recreate a URL string from the parsed URLOBJ." |
54695 | 50 (concat (url-type urlobj) ":" (if (url-host urlobj) "//" "") |
51 (if (url-user urlobj) | |
52 (concat (url-user urlobj) | |
53 (if (url-password urlobj) | |
54 (concat ":" (url-password urlobj))) | |
55 "@")) | |
56 (url-host urlobj) | |
57 (if (and (url-port urlobj) | |
58 (not (equal (url-port urlobj) | |
59 (url-scheme-get-property (url-type urlobj) 'default-port)))) | |
60 (format ":%d" (url-port urlobj))) | |
72589
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
61 (or (url-filename urlobj) "/") |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
62 (url-recreate-url-attributes urlobj) |
54695 | 63 (if (url-target urlobj) |
72589
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
64 (concat "#" (url-target urlobj))))) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
65 |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
66 (defun url-recreate-url-attributes (urlobj) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
67 "Recreate the attributes of an URL string from the parsed URLOBJ." |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
68 (when (url-attributes urlobj) |
77220
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
69 (concat ";" |
72589
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
70 (mapconcat (lambda (x) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
71 (if (cdr x) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
72 (concat (car x) "=" (cdr x)) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
73 (car x))) |
f6c1ac4c14c7
(url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
68640
diff
changeset
|
74 (url-attributes urlobj) ";")))) |
54695 | 75 |
76 ;;;###autoload | |
77 (defun url-generic-parse-url (url) | |
88005
858074177461
(url-generic-parse-url): Fix docstring.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
87649
diff
changeset
|
78 "Return an URL-struct of the parts of URL. |
858074177461
(url-generic-parse-url): Fix docstring.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
87649
diff
changeset
|
79 The CL-style struct contains the following fields: |
858074177461
(url-generic-parse-url): Fix docstring.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
87649
diff
changeset
|
80 TYPE USER PASSWORD HOST PORTSPEC FILENAME TARGET ATTRIBUTES FULLNESS." |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
81 ;; See RFC 3986. |
54695 | 82 (cond |
83 ((null url) | |
83823
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
84 (url-parse-make-urlobj)) |
54695 | 85 ((or (not (string-match url-nonrelative-link url)) |
86 (= ?/ (string-to-char url))) | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
87 ;; This isn't correct, as a relative URL can be a fragment link |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
88 ;; (e.g. "#foo") and many other things (see section 4.2). |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
89 ;; However, let's not fix something that isn't broken, especially |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
90 ;; when close to a release. |
83823
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
91 (url-parse-make-urlobj nil nil nil nil nil url)) |
54695 | 92 (t |
74014
fd0232950658
(url-generic-parse-url): Use with-temp-buffer.
Magnus Henoch <mange@freemail.hu>
parents:
73313
diff
changeset
|
93 (with-temp-buffer |
54695 | 94 (set-syntax-table url-parse-syntax-table) |
95 (let ((save-pos nil) | |
96 (prot nil) | |
97 (user nil) | |
98 (pass nil) | |
99 (host nil) | |
100 (port nil) | |
101 (file nil) | |
102 (refs nil) | |
103 (attr nil) | |
104 (full nil) | |
105 (inhibit-read-only t)) | |
106 (erase-buffer) | |
107 (insert url) | |
108 (goto-char (point-min)) | |
109 (setq save-pos (point)) | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
110 |
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
111 ;; 3.1. Scheme |
54695 | 112 (if (not (looking-at "//")) |
113 (progn | |
114 (skip-chars-forward "a-zA-Z+.\\-") | |
115 (downcase-region save-pos (point)) | |
116 (setq prot (buffer-substring save-pos (point))) | |
117 (skip-chars-forward ":") | |
118 (setq save-pos (point)))) | |
119 | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
120 ;; 3.2. Authority |
54695 | 121 (if (looking-at "//") |
122 (progn | |
123 (setq full t) | |
124 (forward-char 2) | |
125 (setq save-pos (point)) | |
77220
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
126 (skip-chars-forward "^/") |
54695 | 127 (setq host (buffer-substring save-pos (point))) |
128 (if (string-match "^\\([^@]+\\)@" host) | |
129 (setq user (match-string 1 host) | |
130 host (substring host (match-end 0) nil))) | |
131 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user)) | |
132 (setq pass (match-string 2 user) | |
133 user (match-string 1 user))) | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
134 ;; This gives wrong results for IPv6 literal addresses. |
54695 | 135 (if (string-match ":\\([0-9+]+\\)" host) |
62400
e30c08177a3b
Replace `string-to-int' by `string-to-number'.
Juanma Barranquero <lekktu@gmail.com>
parents:
57427
diff
changeset
|
136 (setq port (string-to-number (match-string 1 host)) |
54695 | 137 host (substring host 0 (match-beginning 0)))) |
138 (if (string-match ":$" host) | |
139 (setq host (substring host 0 (match-beginning 0)))) | |
140 (setq host (downcase host) | |
141 save-pos (point)))) | |
142 | |
143 (if (not port) | |
144 (setq port (url-scheme-get-property prot 'default-port))) | |
145 | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
146 ;; 3.3. Path |
77220
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
147 ;; Gross hack to preserve ';' in data URLs |
54695 | 148 (setq save-pos (point)) |
149 | |
73313
8c2a0bfc98b0
(url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents:
72589
diff
changeset
|
150 ;; 3.4. Query |
77220
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
151 (if (string= "data" prot) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
152 (goto-char (point-max)) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
153 ;; Now check for references |
54695 | 154 (skip-chars-forward "^#") |
77220
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
155 (if (eobp) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
156 nil |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
157 (delete-region |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
158 (point) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
159 (progn |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
160 (skip-chars-forward "#") |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
161 (setq refs (buffer-substring (point) (point-max))) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
162 (point-max)))) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
163 (goto-char save-pos) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
164 (skip-chars-forward "^;") |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
165 (if (not (eobp)) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
166 (setq attr (url-parse-args (buffer-substring (point) (point-max)) t) |
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
167 attr (nreverse attr)))) |
54695 | 168 |
77220
4368e2079b5c
(url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents:
75347
diff
changeset
|
169 (setq file (buffer-substring save-pos (point))) |
54695 | 170 (if (and host (string-match "%[0-9][0-9]" host)) |
171 (setq host (url-unhex-string host))) | |
83823
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
172 (url-parse-make-urlobj |
dd2bcc6758a0
* url-parse.el (url): Use defstruct rather than macros. Update all callers.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
78222
diff
changeset
|
173 prot user pass host port file refs attr full)))))) |
54695 | 174 |
175 (provide 'url-parse) | |
54699 | 176 |
54831
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
177 ;; arch-tag: f338325f-71ab-4bee-93cc-78fb9a03d403 |
ca18766bb266
Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
54802
diff
changeset
|
178 ;;; url-parse.el ends here |