86361
|
1 ;;; rng-uri.el --- URI parsing and manipulation
|
|
2
|
|
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Author: James Clark
|
|
6 ;; Keywords: XML
|
|
7
|
|
8 ;; This program is free software; you can redistribute it and/or
|
|
9 ;; modify it under the terms of the GNU General Public License as
|
|
10 ;; published by the Free Software Foundation; either version 2 of
|
|
11 ;; the License, or (at your option) any later version.
|
|
12
|
|
13 ;; This program is distributed in the hope that it will be
|
|
14 ;; useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
15 ;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
16 ;; PURPOSE. See the GNU General Public License for more details.
|
|
17
|
|
18 ;; You should have received a copy of the GNU General Public
|
|
19 ;; License along with this program; if not, write to the Free
|
|
20 ;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
21 ;; MA 02111-1307 USA
|
|
22
|
|
23 ;;; Commentary:
|
|
24
|
|
25 ;;; Code:
|
|
26
|
|
27 (defun rng-file-name-uri (f)
|
|
28 "Return a URI for the filename F.
|
|
29 Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to
|
|
30 escape them using %HH."
|
|
31 (setq f (expand-file-name f))
|
|
32 (let ((url
|
|
33 (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]"
|
|
34 'rng-percent-encode
|
|
35 f)))
|
|
36 (concat "file:"
|
|
37 (if (and (> (length url) 0)
|
|
38 (= (aref url 0) ?/))
|
|
39 "//"
|
|
40 "///")
|
|
41 url)))
|
|
42
|
|
43 (defun rng-uri-escape-multibyte (uri)
|
|
44 "Escape multibyte characters in URI."
|
|
45 (replace-regexp-in-string "[:nonascii:]"
|
|
46 'rng-percent-encode
|
|
47 (encode-coding-string uri 'utf-8)))
|
|
48
|
|
49 (defun rng-percent-encode (str)
|
|
50 (apply 'concat
|
|
51 (mapcar (lambda (ch)
|
|
52 (format "%%%x%x" (/ ch 16) (% ch 16)))
|
|
53 (string-to-list str))))
|
|
54
|
|
55
|
|
56 (defun rng-uri-file-name (uri)
|
|
57 "Return the filename represented by a URI.
|
|
58 Signal an error if URI is not a valid file URL."
|
|
59 (rng-uri-file-name-1 uri nil))
|
|
60
|
|
61 (defun rng-uri-pattern-file-name-regexp (pattern)
|
|
62 "Return a regexp for filenames represented by URIs that match PATTERN."
|
|
63 (rng-uri-file-name-1 pattern 'match))
|
|
64
|
|
65 (defun rng-uri-pattern-file-name-replace-match (pattern)
|
|
66 (rng-uri-file-name-1 pattern 'replace))
|
|
67
|
|
68 ;; pattern is either nil or match or replace
|
|
69 (defun rng-uri-file-name-1 (uri pattern)
|
|
70 (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri)
|
|
71 (rng-uri-error "Bad escapes in URI `%s'" uri))
|
|
72 (setq uri (rng-uri-unescape-multibyte uri))
|
|
73 (let* ((components
|
|
74 (or (rng-uri-split uri)
|
|
75 (rng-uri-error "Cannot split URI `%s' into its components" uri)))
|
|
76 (scheme (nth 0 components))
|
|
77 (authority (nth 1 components))
|
|
78 (path (nth 2 components))
|
|
79 (absolutep (string-match "\\`/" path))
|
|
80 (query (nth 3 components))
|
|
81 (fragment-id (nth 4 components)))
|
|
82 (cond ((not scheme)
|
|
83 (unless pattern
|
|
84 (rng-uri-error "URI `%s' does not have a scheme" uri)))
|
|
85 ((not (string= (downcase scheme) "file"))
|
|
86 (rng-uri-error "URI `%s' does not use the `file:' scheme" uri)))
|
|
87 (when (not (member authority
|
|
88 (cons system-name '(nil "" "localhost"))))
|
|
89 (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'"
|
|
90 uri))
|
|
91 (when query
|
|
92 (rng-uri-error "`?' not escaped in file URI `%s'" uri))
|
|
93 (when fragment-id
|
|
94 (rng-uri-error "URI `%s' has a fragment identifier" uri))
|
|
95 (when (string-match ";" path)
|
|
96 (rng-uri-error "`;' not escaped in URI `%s'" uri))
|
|
97 (when (string-match "%2[fF]" path) ;; 2f is hex code of slash
|
|
98 (rng-uri-error "Escaped slash in URI `%s'" uri))
|
|
99 (when (and (eq system-type 'windows-nt)
|
|
100 absolutep
|
|
101 (file-name-absolute-p (substring path 1)))
|
|
102 (setq path (substring path 1)))
|
|
103 (when (and pattern (string-match "\\`\\./" path))
|
|
104 (setq path (substring path 2)))
|
|
105 (setq path
|
|
106 (cond ((eq pattern 'match)
|
|
107 (rng-uri-unescape-unibyte-match path))
|
|
108 ((eq pattern 'replace)
|
|
109 (rng-uri-unescape-unibyte-replace path 2))
|
|
110 (t
|
|
111 (rng-uri-unescape-unibyte path))))
|
|
112 (when (string-match "\000" path)
|
|
113 (rng-uri-error "URI `%s' has NUL character in path" uri))
|
|
114 (when (eq pattern 'match)
|
|
115 (setq path
|
|
116 (concat (if absolutep
|
|
117 "\\(\\)"
|
|
118 "\\(\\(?:[^/]*/\\)*\\)")
|
|
119 path)))
|
|
120 (cond ((eq pattern 'match)
|
|
121 (concat "\\`" path "\\'"))
|
|
122 ((and (eq pattern 'replace)
|
|
123 (not absolutep))
|
|
124 (concat "\\1" path))
|
|
125 (t path))))
|
|
126
|
|
127 (defun rng-uri-error (&rest args)
|
|
128 (signal 'rng-uri-error (list (apply 'format args))))
|
|
129
|
|
130 (put 'rng-uri-error 'error-conditions '(error rng-uri-error))
|
|
131 (put 'rng-uri-error 'error-message "Invalid URI")
|
|
132
|
|
133 (defun rng-uri-split (str)
|
|
134 (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\
|
|
135 \\(?://\\([^/?#]*\\)\\)?\
|
|
136 \\([^?#]*\\)\
|
|
137 \\(?:\\?\\([^#]*\\)\\)?\
|
|
138 \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'"
|
|
139 str)
|
|
140 (list (match-string 1 str)
|
|
141 (match-string 2 str)
|
|
142 (match-string 3 str)
|
|
143 (match-string 4 str)
|
|
144 (match-string 5 str))))
|
|
145
|
|
146 (defun rng-uri-join (scheme authority path &optional query fragment-id)
|
|
147 (when path
|
|
148 (let (parts)
|
|
149 (when fragment-id
|
|
150 (setq parts (list "#" fragment-id)))
|
|
151 (when query
|
|
152 (setq parts
|
|
153 (cons "?"
|
|
154 (cons query parts))))
|
|
155 (setq parts (cons path parts))
|
|
156 (when authority
|
|
157 (setq parts
|
|
158 (cons "//"
|
|
159 (cons authority parts))))
|
|
160 (when scheme
|
|
161 (setq parts
|
|
162 (cons scheme
|
|
163 (cons ":" parts))))
|
|
164 (apply 'concat parts))))
|
|
165
|
|
166 (defun rng-uri-resolve (uri-ref base-uri)
|
|
167 "Resolve a possibly relative URI reference into absolute form.
|
|
168 URI-REF is the URI reference to be resolved.
|
|
169 BASE-URI is the base URI to use for resolving it.
|
|
170 The algorithm is specified by RFC 2396.
|
|
171 If there is some problem with URI-REF or BASE-URI, then
|
|
172 URI-REF will be returned."
|
|
173 (let* ((components (rng-uri-split uri-ref))
|
|
174 (scheme (nth 0 components))
|
|
175 (authority (nth 1 components))
|
|
176 (path (nth 2 components))
|
|
177 (query (nth 3 components))
|
|
178 (fragment-id (nth 4 components))
|
|
179 (base-components (rng-uri-split base-uri)))
|
|
180 (if (or (not components)
|
|
181 scheme
|
|
182 (not base-components)
|
|
183 (not (nth 0 base-components)))
|
|
184 uri-ref
|
|
185 (setq scheme (nth 0 base-components))
|
|
186 (when (not authority)
|
|
187 (setq authority (nth 1 base-components))
|
|
188 (if (and (equal path "") (not query))
|
|
189 ;; Handle same document reference by returning
|
|
190 ;; same URI (RFC 2396bis does this too).
|
|
191 (setq path (nth 2 base-components)
|
|
192 query (nth 3 base-components))
|
|
193 (setq path (rng-resolve-path path (nth 2 base-components)))))
|
|
194 (rng-uri-join scheme
|
|
195 authority
|
|
196 path
|
|
197 query
|
|
198 fragment-id))))
|
|
199
|
|
200 ;; See RFC 2396 5.2, steps 5 and 6
|
|
201 (defun rng-resolve-path (path base-path)
|
|
202 ;; Step 5
|
|
203 (if (or (string-match "\\`/" path)
|
|
204 (not (string-match "\\`/" base-path)))
|
|
205 path
|
|
206 ;; Step 6
|
|
207 ;; (a), (b)
|
|
208 (let ((segments (rng-split-path path))
|
|
209 (base-segments (rng-split-path base-path)))
|
|
210 (if (> (length base-segments) 1)
|
|
211 (setq segments (nconc (nbutlast base-segments)
|
|
212 segments))
|
|
213 (setcar segments
|
|
214 (concat (car base-segments) (car segments))))
|
|
215 ;; (d)
|
|
216 (let ((last-segment (last segments)))
|
|
217 (when (equal (car last-segment) ".")
|
|
218 (setcar last-segment "")))
|
|
219 ;; (c)
|
|
220 (setq segments (delete "." segments))
|
|
221 ;; (e)
|
|
222 (let (iter matched)
|
|
223 (while (progn
|
|
224 (setq matched nil)
|
|
225 (setq iter (cdr segments))
|
|
226 (while (and iter (not matched))
|
|
227 (if (or (not (equal (cadr iter) ".."))
|
|
228 (equal (car iter) ".."))
|
|
229 (setq iter (cdr iter))
|
|
230 (setcar iter nil)
|
|
231 (setcar (cdr iter)
|
|
232 ;; (f)
|
|
233 (if (cddr iter) nil ""))
|
|
234 (setq matched t)
|
|
235 (setq segments (delq nil segments))))
|
|
236 matched)))
|
|
237 (rng-join-path segments))))
|
|
238
|
|
239 (defun rng-relative-uri (full base)
|
|
240 "Return a URI that relative to BASE is equivalent to FULL.
|
|
241 The returned URI will be relative if possible.
|
|
242 Both FULL and BASE must be absolute URIs."
|
|
243 (let* ((components (rng-uri-split full))
|
|
244 (scheme (nth 0 components))
|
|
245 (authority (nth 1 components))
|
|
246 (path (nth 2 components))
|
|
247 (query (nth 3 components))
|
|
248 (fragment-id (nth 4 components))
|
|
249 (base-components (rng-uri-split base)))
|
|
250 (if (and components
|
|
251 base-components
|
|
252 scheme
|
|
253 (equal scheme
|
|
254 (nth 0 base-components)))
|
|
255 (progn
|
|
256 (setq scheme nil)
|
|
257 (when (and authority
|
|
258 (equal authority
|
|
259 (nth 1 base-components)))
|
|
260 (setq authority nil)
|
|
261 (setq path (rng-relative-path path (nth 2 base-components))))
|
|
262 (rng-uri-join scheme authority path query fragment-id))
|
|
263 full)))
|
|
264
|
|
265 (defun rng-relative-path (path base-path)
|
|
266 (let ((segments (rng-split-path path))
|
|
267 (base-segments (rng-split-path base-path)))
|
|
268 (when (> (length base-segments) 1)
|
|
269 (setq base-segments (nbutlast base-segments)))
|
|
270 (if (or (member "." segments)
|
|
271 (member ".." segments)
|
|
272 (member "." base-segments)
|
|
273 (member ".." base-segments))
|
|
274 path
|
|
275 (while (and segments
|
|
276 base-segments
|
|
277 (string= (car segments)
|
|
278 (car base-segments)))
|
|
279 (setq segments (cdr segments))
|
|
280 (setq base-segments (cdr base-segments)))
|
|
281 (while base-segments
|
|
282 (setq base-segments (cdr base-segments))
|
|
283 (setq segments (cons ".." segments)))
|
|
284 (when (equal (car segments) "")
|
|
285 (setq segments (cons "." segments)))
|
|
286 (rng-join-path segments))))
|
|
287
|
|
288 (defun rng-split-path (path)
|
|
289 (let ((start 0)
|
|
290 segments)
|
|
291 (while (string-match "/" path start)
|
|
292 (setq segments (cons (substring path start (match-beginning 0))
|
|
293 segments))
|
|
294 (setq start (match-end 0)))
|
|
295 (nreverse (cons (substring path start) segments))))
|
|
296
|
|
297 (defun rng-join-path (segments)
|
|
298 (and segments
|
|
299 (mapconcat 'identity segments "/")))
|
|
300
|
|
301 (defun rng-uri-unescape-multibyte (str)
|
|
302 (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+"
|
|
303 'rng-multibyte-percent-decode
|
|
304 str))
|
|
305
|
|
306 (defun rng-multibyte-percent-decode (str)
|
|
307 (decode-coding-string (apply 'string
|
|
308 (mapcar (lambda (h) (string-to-number h 16))
|
|
309 (split-string str "%")))
|
|
310 'utf-8))
|
|
311
|
|
312 (defun rng-uri-unescape-unibyte (str)
|
|
313 (replace-regexp-in-string "%[0-7][0-9a-fA-F]"
|
|
314 (lambda (h)
|
|
315 (string-to-number (substring h 1) 16))
|
|
316 str
|
|
317 t
|
|
318 t))
|
|
319
|
|
320 (defun rng-uri-unescape-unibyte-match (str)
|
|
321 (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]"
|
|
322 (lambda (match)
|
|
323 (if (string= match "*")
|
|
324 "\\([^/]*\\)"
|
|
325 (regexp-quote
|
|
326 (if (= (length match) 1)
|
|
327 match
|
|
328 (string-to-number (substring match 1)
|
|
329 16)))))
|
|
330 str
|
|
331 t
|
|
332 t))
|
|
333
|
|
334 (defun rng-uri-unescape-unibyte-replace (str next-match-index)
|
|
335 (replace-regexp-in-string
|
|
336 "%[0-7][0-9a-fA-F]\\|[^%]"
|
|
337 (lambda (match)
|
|
338 (if (string= match "*")
|
|
339 (let ((n next-match-index))
|
|
340 (setq next-match-index (1+ n))
|
|
341 (format "\\%s" n))
|
|
342 (let ((ch (if (= (length match) 1)
|
|
343 (aref match 0)
|
|
344 (string-to-number (substring match 1)
|
|
345 16))))
|
|
346 (if (eq ch ?\\)
|
|
347 (string ?\\ ?\\)
|
|
348 (string ch)))))
|
|
349 str
|
|
350 t
|
|
351 t))
|
|
352
|
|
353 (provide 'rng-uri)
|
|
354
|
86379
|
355 ;; arch-tag: c7b7b8b8-61d1-48ec-82bc-7001c70b2e9d
|
86361
|
356 ;;; rng-uri.el ends here
|