annotate lisp/url/url-parse.el @ 79877:ef3503538f91

(detect_coding_iso2022): New arg latin_extra_code_state. Allow Latin extra codes only when *latin_extra_code_state is nonzero. (detect_coding_mask): If there is a NULL byte, detect the encoding as UTF-16 or binary. If Latin extra codes exist, detect the encoding as ISO-2022 only when there's no other proper encoding is found.
author Kenichi Handa <handa@m17n.org>
date Sat, 19 Jan 2008 05:55:36 +0000
parents 9c0b3f269b92
children 107ccd98fa12
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
1 ;;; url-parse.el --- Uniform Resource Locator parser
54831
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
2
64748
875dcc490074 Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents: 64084
diff changeset
3 ;; Copyright (C) 1996, 1997, 1998, 1999, 2004,
79720
9c0b3f269b92 Add 2008 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 79007
diff changeset
4 ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
54831
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
5
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
6 ;; Keywords: comm, data, processes
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
7
54831
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
8 ;; This file is part of GNU Emacs.
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
9 ;;
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
11 ;; it under the terms of the GNU General Public License as published by
78222
8932997d0b62 Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents: 77220
diff changeset
12 ;; the Free Software Foundation; either version 3, or (at your option)
54831
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
13 ;; any later version.
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
14 ;;
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
15 ;; GNU Emacs is distributed in the hope that it will be useful,
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
18 ;; GNU General Public License for more details.
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
19 ;;
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
20 ;; You should have received a copy of the GNU General Public License
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
64084
a8fa7c632ee4 Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 62400
diff changeset
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
a8fa7c632ee4 Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 62400
diff changeset
23 ;; Boston, MA 02110-1301, USA.
54831
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
24
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
25 ;;; Commentary:
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
26
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
27 ;;; Code:
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
28
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
29 (require 'url-vars)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
30
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
31 (autoload 'url-scheme-get-property "url-methods")
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
32
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
33 (defun url-type (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
34 (aref urlobj 0))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
35
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
36 (defun url-user (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
37 (aref urlobj 1))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
38
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
39 (defun url-password (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
40 (aref urlobj 2))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
41
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
42 (defun url-host (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
43 (aref urlobj 3))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
44
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
45 (defun url-port (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
46 (or (aref urlobj 4)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
47 (if (url-fullness urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
48 (url-scheme-get-property (url-type urlobj) 'default-port))))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
49
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
50 (defun url-filename (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
51 (aref urlobj 5))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
52
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
53 (defun url-target (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
54 (aref urlobj 6))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
55
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
56 (defun url-attributes (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
57 (aref urlobj 7))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
58
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
59 (defun url-fullness (urlobj)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
60 (aref urlobj 8))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
61
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
62 (defun url-set-type (urlobj type)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
63 (aset urlobj 0 type))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
64
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
65 (defun url-set-user (urlobj user)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
66 (aset urlobj 1 user))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
67
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
68 (defun url-set-password (urlobj pass)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
69 (aset urlobj 2 pass))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
70
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
71 (defun url-set-host (urlobj host)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
72 (aset urlobj 3 host))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
73
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
74 (defun url-set-port (urlobj port)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
75 (aset urlobj 4 port))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
76
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
77 (defun url-set-filename (urlobj file)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
78 (aset urlobj 5 file))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
79
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
80 (defun url-set-target (urlobj targ)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
81 (aset urlobj 6 targ))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
82
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
83 (defun url-set-attributes (urlobj targ)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
84 (aset urlobj 7 targ))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
85
79007
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
86 (defun url-set-full (urlobj val)
218cb37c24ee (url-type, url-user, url-password, url-host)
Richard M. Stallman <rms@gnu.org>
parents: 78222
diff changeset
87 (aset urlobj 8 val))
64748
875dcc490074 Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents: 64084
diff changeset
88
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
89 ;;;###autoload
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
90 (defun url-recreate-url (urlobj)
54802
15a07e792605 Doc fixes.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54770
diff changeset
91 "Recreate a URL string from the parsed URLOBJ."
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
92 (concat (url-type urlobj) ":" (if (url-host urlobj) "//" "")
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
93 (if (url-user urlobj)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
94 (concat (url-user urlobj)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
95 (if (url-password urlobj)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
96 (concat ":" (url-password urlobj)))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
97 "@"))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
98 (url-host urlobj)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
99 (if (and (url-port urlobj)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
100 (not (equal (url-port urlobj)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
101 (url-scheme-get-property (url-type urlobj) 'default-port))))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
102 (format ":%d" (url-port urlobj)))
72589
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
103 (or (url-filename urlobj) "/")
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
104 (url-recreate-url-attributes urlobj)
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
105 (if (url-target urlobj)
72589
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
106 (concat "#" (url-target urlobj)))))
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
107
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
108 (defun url-recreate-url-attributes (urlobj)
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
109 "Recreate the attributes of an URL string from the parsed URLOBJ."
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
110 (when (url-attributes urlobj)
77220
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
111 (concat ";"
72589
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
112 (mapconcat (lambda (x)
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
113 (if (cdr x)
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
114 (concat (car x) "=" (cdr x))
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
115 (car x)))
f6c1ac4c14c7 (url-recreate-url-attributes): New function, code moved from `url-recreate-url'.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 68640
diff changeset
116 (url-attributes urlobj) ";"))))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
117
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
118 ;;;###autoload
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
119 (defun url-generic-parse-url (url)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
120 "Return a vector of the parts of URL.
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
121 Format is:
54802
15a07e792605 Doc fixes.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54770
diff changeset
122 \[TYPE USER PASSWORD HOST PORT FILE TARGET ATTRIBUTES FULL\]"
73313
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
123 ;; See RFC 3986.
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
124 (cond
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
125 ((null url)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
126 (make-vector 9 nil))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
127 ((or (not (string-match url-nonrelative-link url))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
128 (= ?/ (string-to-char url)))
73313
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
129 ;; This isn't correct, as a relative URL can be a fragment link
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
130 ;; (e.g. "#foo") and many other things (see section 4.2).
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
131 ;; However, let's not fix something that isn't broken, especially
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
132 ;; when close to a release.
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
133 (let ((retval (make-vector 9 nil)))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
134 (url-set-filename retval url)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
135 (url-set-full retval nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
136 retval))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
137 (t
74014
fd0232950658 (url-generic-parse-url): Use with-temp-buffer.
Magnus Henoch <mange@freemail.hu>
parents: 73313
diff changeset
138 (with-temp-buffer
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
139 (set-syntax-table url-parse-syntax-table)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
140 (let ((save-pos nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
141 (prot nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
142 (user nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
143 (pass nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
144 (host nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
145 (port nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
146 (file nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
147 (refs nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
148 (attr nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
149 (full nil)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
150 (inhibit-read-only t))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
151 (erase-buffer)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
152 (insert url)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
153 (goto-char (point-min))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
154 (setq save-pos (point))
73313
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
155
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
156 ;; 3.1. Scheme
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
157 (if (not (looking-at "//"))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
158 (progn
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
159 (skip-chars-forward "a-zA-Z+.\\-")
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
160 (downcase-region save-pos (point))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
161 (setq prot (buffer-substring save-pos (point)))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
162 (skip-chars-forward ":")
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
163 (setq save-pos (point))))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
164
73313
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
165 ;; 3.2. Authority
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
166 (if (looking-at "//")
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
167 (progn
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
168 (setq full t)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
169 (forward-char 2)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
170 (setq save-pos (point))
77220
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
171 (skip-chars-forward "^/")
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
172 (setq host (buffer-substring save-pos (point)))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
173 (if (string-match "^\\([^@]+\\)@" host)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
174 (setq user (match-string 1 host)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
175 host (substring host (match-end 0) nil)))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
176 (if (and user (string-match "\\([^:]+\\):\\(.*\\)" user))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
177 (setq pass (match-string 2 user)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
178 user (match-string 1 user)))
73313
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
179 ;; This gives wrong results for IPv6 literal addresses.
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
180 (if (string-match ":\\([0-9+]+\\)" host)
62400
e30c08177a3b Replace `string-to-int' by `string-to-number'.
Juanma Barranquero <lekktu@gmail.com>
parents: 57427
diff changeset
181 (setq port (string-to-number (match-string 1 host))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
182 host (substring host 0 (match-beginning 0))))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
183 (if (string-match ":$" host)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
184 (setq host (substring host 0 (match-beginning 0))))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
185 (setq host (downcase host)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
186 save-pos (point))))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
187
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
188 (if (not port)
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
189 (setq port (url-scheme-get-property prot 'default-port)))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
190
73313
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
191 ;; 3.3. Path
77220
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
192 ;; Gross hack to preserve ';' in data URLs
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
193 (setq save-pos (point))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
194
73313
8c2a0bfc98b0 (url-generic-parse-url): Handle URLs with empty path component and
Magnus Henoch <mange@freemail.hu>
parents: 72589
diff changeset
195 ;; 3.4. Query
77220
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
196 (if (string= "data" prot)
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
197 (goto-char (point-max))
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
198 ;; Now check for references
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
199 (skip-chars-forward "^#")
77220
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
200 (if (eobp)
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
201 nil
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
202 (delete-region
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
203 (point)
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
204 (progn
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
205 (skip-chars-forward "#")
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
206 (setq refs (buffer-substring (point) (point-max)))
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
207 (point-max))))
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
208 (goto-char save-pos)
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
209 (skip-chars-forward "^;")
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
210 (if (not (eobp))
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
211 (setq attr (url-parse-args (buffer-substring (point) (point-max)) t)
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
212 attr (nreverse attr))))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
213
77220
4368e2079b5c (url-generic-parse-url): Revert 2006-10-09 changes.
Chong Yidong <cyd@stupidchicken.com>
parents: 75347
diff changeset
214 (setq file (buffer-substring save-pos (point)))
54695
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
215 (if (and host (string-match "%[0-9][0-9]" host))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
216 (setq host (url-unhex-string host)))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
217 (vector prot user pass host port file refs attr full))))))
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
218
3fb37923e567 Initial revision
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
diff changeset
219 (provide 'url-parse)
54699
7784ae10206d Resolve CVS conflicts
Miles Bader <miles@gnu.org>
parents: 54698
diff changeset
220
54831
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
221 ;; arch-tag: f338325f-71ab-4bee-93cc-78fb9a03d403
ca18766bb266 Comment fixups.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 54802
diff changeset
222 ;;; url-parse.el ends here