86361
|
1 ;;; rng-uri.el --- URI parsing and manipulation
|
|
2
|
86553
|
3 ;; Copyright (C) 2003, 2007 Free Software Foundation, Inc.
|
86361
|
4
|
|
5 ;; Author: James Clark
|
|
6 ;; Keywords: XML
|
|
7
|
86553
|
8 ;; This file is part of GNU Emacs.
|
|
9
|
|
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
11 ;; it under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
13 ;; any later version.
|
86361
|
14
|
86553
|
15 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18 ;; GNU General Public License for more details.
|
86361
|
19
|
86553
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
23 ;; Boston, MA 02110-1301, USA.
|
86361
|
24
|
|
25 ;;; Commentary:
|
|
26
|
|
27 ;;; Code:
|
|
28
|
|
29 (defun rng-file-name-uri (f)
|
|
30 "Return a URI for the filename F.
|
|
31 Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to
|
|
32 escape them using %HH."
|
|
33 (setq f (expand-file-name f))
|
|
34 (let ((url
|
|
35 (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]"
|
|
36 'rng-percent-encode
|
|
37 f)))
|
|
38 (concat "file:"
|
|
39 (if (and (> (length url) 0)
|
|
40 (= (aref url 0) ?/))
|
|
41 "//"
|
|
42 "///")
|
|
43 url)))
|
|
44
|
|
45 (defun rng-uri-escape-multibyte (uri)
|
|
46 "Escape multibyte characters in URI."
|
|
47 (replace-regexp-in-string "[:nonascii:]"
|
|
48 'rng-percent-encode
|
|
49 (encode-coding-string uri 'utf-8)))
|
|
50
|
|
51 (defun rng-percent-encode (str)
|
|
52 (apply 'concat
|
|
53 (mapcar (lambda (ch)
|
|
54 (format "%%%x%x" (/ ch 16) (% ch 16)))
|
|
55 (string-to-list str))))
|
|
56
|
|
57
|
|
58 (defun rng-uri-file-name (uri)
|
|
59 "Return the filename represented by a URI.
|
|
60 Signal an error if URI is not a valid file URL."
|
|
61 (rng-uri-file-name-1 uri nil))
|
|
62
|
|
63 (defun rng-uri-pattern-file-name-regexp (pattern)
|
|
64 "Return a regexp for filenames represented by URIs that match PATTERN."
|
|
65 (rng-uri-file-name-1 pattern 'match))
|
|
66
|
|
67 (defun rng-uri-pattern-file-name-replace-match (pattern)
|
|
68 (rng-uri-file-name-1 pattern 'replace))
|
|
69
|
|
70 ;; pattern is either nil or match or replace
|
|
71 (defun rng-uri-file-name-1 (uri pattern)
|
|
72 (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri)
|
|
73 (rng-uri-error "Bad escapes in URI `%s'" uri))
|
|
74 (setq uri (rng-uri-unescape-multibyte uri))
|
|
75 (let* ((components
|
|
76 (or (rng-uri-split uri)
|
|
77 (rng-uri-error "Cannot split URI `%s' into its components" uri)))
|
|
78 (scheme (nth 0 components))
|
|
79 (authority (nth 1 components))
|
|
80 (path (nth 2 components))
|
|
81 (absolutep (string-match "\\`/" path))
|
|
82 (query (nth 3 components))
|
|
83 (fragment-id (nth 4 components)))
|
|
84 (cond ((not scheme)
|
|
85 (unless pattern
|
|
86 (rng-uri-error "URI `%s' does not have a scheme" uri)))
|
|
87 ((not (string= (downcase scheme) "file"))
|
|
88 (rng-uri-error "URI `%s' does not use the `file:' scheme" uri)))
|
|
89 (when (not (member authority
|
|
90 (cons system-name '(nil "" "localhost"))))
|
|
91 (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'"
|
|
92 uri))
|
|
93 (when query
|
|
94 (rng-uri-error "`?' not escaped in file URI `%s'" uri))
|
|
95 (when fragment-id
|
|
96 (rng-uri-error "URI `%s' has a fragment identifier" uri))
|
|
97 (when (string-match ";" path)
|
|
98 (rng-uri-error "`;' not escaped in URI `%s'" uri))
|
|
99 (when (string-match "%2[fF]" path) ;; 2f is hex code of slash
|
|
100 (rng-uri-error "Escaped slash in URI `%s'" uri))
|
|
101 (when (and (eq system-type 'windows-nt)
|
|
102 absolutep
|
|
103 (file-name-absolute-p (substring path 1)))
|
|
104 (setq path (substring path 1)))
|
|
105 (when (and pattern (string-match "\\`\\./" path))
|
|
106 (setq path (substring path 2)))
|
|
107 (setq path
|
|
108 (cond ((eq pattern 'match)
|
|
109 (rng-uri-unescape-unibyte-match path))
|
|
110 ((eq pattern 'replace)
|
|
111 (rng-uri-unescape-unibyte-replace path 2))
|
|
112 (t
|
|
113 (rng-uri-unescape-unibyte path))))
|
|
114 (when (string-match "\000" path)
|
|
115 (rng-uri-error "URI `%s' has NUL character in path" uri))
|
|
116 (when (eq pattern 'match)
|
|
117 (setq path
|
|
118 (concat (if absolutep
|
|
119 "\\(\\)"
|
|
120 "\\(\\(?:[^/]*/\\)*\\)")
|
|
121 path)))
|
|
122 (cond ((eq pattern 'match)
|
|
123 (concat "\\`" path "\\'"))
|
|
124 ((and (eq pattern 'replace)
|
|
125 (not absolutep))
|
|
126 (concat "\\1" path))
|
|
127 (t path))))
|
|
128
|
|
129 (defun rng-uri-error (&rest args)
|
|
130 (signal 'rng-uri-error (list (apply 'format args))))
|
|
131
|
|
132 (put 'rng-uri-error 'error-conditions '(error rng-uri-error))
|
|
133 (put 'rng-uri-error 'error-message "Invalid URI")
|
|
134
|
|
135 (defun rng-uri-split (str)
|
|
136 (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\
|
|
137 \\(?://\\([^/?#]*\\)\\)?\
|
|
138 \\([^?#]*\\)\
|
|
139 \\(?:\\?\\([^#]*\\)\\)?\
|
|
140 \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'"
|
|
141 str)
|
|
142 (list (match-string 1 str)
|
|
143 (match-string 2 str)
|
|
144 (match-string 3 str)
|
|
145 (match-string 4 str)
|
|
146 (match-string 5 str))))
|
|
147
|
|
148 (defun rng-uri-join (scheme authority path &optional query fragment-id)
|
|
149 (when path
|
|
150 (let (parts)
|
|
151 (when fragment-id
|
|
152 (setq parts (list "#" fragment-id)))
|
|
153 (when query
|
|
154 (setq parts
|
|
155 (cons "?"
|
|
156 (cons query parts))))
|
|
157 (setq parts (cons path parts))
|
|
158 (when authority
|
|
159 (setq parts
|
|
160 (cons "//"
|
|
161 (cons authority parts))))
|
|
162 (when scheme
|
|
163 (setq parts
|
|
164 (cons scheme
|
|
165 (cons ":" parts))))
|
|
166 (apply 'concat parts))))
|
|
167
|
|
168 (defun rng-uri-resolve (uri-ref base-uri)
|
|
169 "Resolve a possibly relative URI reference into absolute form.
|
|
170 URI-REF is the URI reference to be resolved.
|
|
171 BASE-URI is the base URI to use for resolving it.
|
|
172 The algorithm is specified by RFC 2396.
|
|
173 If there is some problem with URI-REF or BASE-URI, then
|
|
174 URI-REF will be returned."
|
|
175 (let* ((components (rng-uri-split uri-ref))
|
|
176 (scheme (nth 0 components))
|
|
177 (authority (nth 1 components))
|
|
178 (path (nth 2 components))
|
|
179 (query (nth 3 components))
|
|
180 (fragment-id (nth 4 components))
|
|
181 (base-components (rng-uri-split base-uri)))
|
|
182 (if (or (not components)
|
|
183 scheme
|
|
184 (not base-components)
|
|
185 (not (nth 0 base-components)))
|
|
186 uri-ref
|
|
187 (setq scheme (nth 0 base-components))
|
|
188 (when (not authority)
|
|
189 (setq authority (nth 1 base-components))
|
|
190 (if (and (equal path "") (not query))
|
|
191 ;; Handle same document reference by returning
|
|
192 ;; same URI (RFC 2396bis does this too).
|
|
193 (setq path (nth 2 base-components)
|
|
194 query (nth 3 base-components))
|
|
195 (setq path (rng-resolve-path path (nth 2 base-components)))))
|
|
196 (rng-uri-join scheme
|
|
197 authority
|
|
198 path
|
|
199 query
|
|
200 fragment-id))))
|
|
201
|
|
202 ;; See RFC 2396 5.2, steps 5 and 6
|
|
203 (defun rng-resolve-path (path base-path)
|
|
204 ;; Step 5
|
|
205 (if (or (string-match "\\`/" path)
|
|
206 (not (string-match "\\`/" base-path)))
|
|
207 path
|
|
208 ;; Step 6
|
|
209 ;; (a), (b)
|
|
210 (let ((segments (rng-split-path path))
|
|
211 (base-segments (rng-split-path base-path)))
|
|
212 (if (> (length base-segments) 1)
|
|
213 (setq segments (nconc (nbutlast base-segments)
|
|
214 segments))
|
|
215 (setcar segments
|
|
216 (concat (car base-segments) (car segments))))
|
|
217 ;; (d)
|
|
218 (let ((last-segment (last segments)))
|
|
219 (when (equal (car last-segment) ".")
|
|
220 (setcar last-segment "")))
|
|
221 ;; (c)
|
|
222 (setq segments (delete "." segments))
|
|
223 ;; (e)
|
|
224 (let (iter matched)
|
|
225 (while (progn
|
|
226 (setq matched nil)
|
|
227 (setq iter (cdr segments))
|
|
228 (while (and iter (not matched))
|
|
229 (if (or (not (equal (cadr iter) ".."))
|
|
230 (equal (car iter) ".."))
|
|
231 (setq iter (cdr iter))
|
|
232 (setcar iter nil)
|
|
233 (setcar (cdr iter)
|
|
234 ;; (f)
|
|
235 (if (cddr iter) nil ""))
|
|
236 (setq matched t)
|
|
237 (setq segments (delq nil segments))))
|
|
238 matched)))
|
|
239 (rng-join-path segments))))
|
|
240
|
|
241 (defun rng-relative-uri (full base)
|
|
242 "Return a URI that relative to BASE is equivalent to FULL.
|
|
243 The returned URI will be relative if possible.
|
|
244 Both FULL and BASE must be absolute URIs."
|
|
245 (let* ((components (rng-uri-split full))
|
|
246 (scheme (nth 0 components))
|
|
247 (authority (nth 1 components))
|
|
248 (path (nth 2 components))
|
|
249 (query (nth 3 components))
|
|
250 (fragment-id (nth 4 components))
|
|
251 (base-components (rng-uri-split base)))
|
|
252 (if (and components
|
|
253 base-components
|
|
254 scheme
|
|
255 (equal scheme
|
|
256 (nth 0 base-components)))
|
|
257 (progn
|
|
258 (setq scheme nil)
|
|
259 (when (and authority
|
|
260 (equal authority
|
|
261 (nth 1 base-components)))
|
|
262 (setq authority nil)
|
|
263 (setq path (rng-relative-path path (nth 2 base-components))))
|
|
264 (rng-uri-join scheme authority path query fragment-id))
|
|
265 full)))
|
|
266
|
|
267 (defun rng-relative-path (path base-path)
|
|
268 (let ((segments (rng-split-path path))
|
|
269 (base-segments (rng-split-path base-path)))
|
|
270 (when (> (length base-segments) 1)
|
|
271 (setq base-segments (nbutlast base-segments)))
|
|
272 (if (or (member "." segments)
|
|
273 (member ".." segments)
|
|
274 (member "." base-segments)
|
|
275 (member ".." base-segments))
|
|
276 path
|
|
277 (while (and segments
|
|
278 base-segments
|
|
279 (string= (car segments)
|
|
280 (car base-segments)))
|
|
281 (setq segments (cdr segments))
|
|
282 (setq base-segments (cdr base-segments)))
|
|
283 (while base-segments
|
|
284 (setq base-segments (cdr base-segments))
|
|
285 (setq segments (cons ".." segments)))
|
|
286 (when (equal (car segments) "")
|
|
287 (setq segments (cons "." segments)))
|
|
288 (rng-join-path segments))))
|
|
289
|
|
290 (defun rng-split-path (path)
|
|
291 (let ((start 0)
|
|
292 segments)
|
|
293 (while (string-match "/" path start)
|
|
294 (setq segments (cons (substring path start (match-beginning 0))
|
|
295 segments))
|
|
296 (setq start (match-end 0)))
|
|
297 (nreverse (cons (substring path start) segments))))
|
|
298
|
|
299 (defun rng-join-path (segments)
|
|
300 (and segments
|
|
301 (mapconcat 'identity segments "/")))
|
|
302
|
|
303 (defun rng-uri-unescape-multibyte (str)
|
|
304 (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+"
|
|
305 'rng-multibyte-percent-decode
|
|
306 str))
|
|
307
|
|
308 (defun rng-multibyte-percent-decode (str)
|
|
309 (decode-coding-string (apply 'string
|
|
310 (mapcar (lambda (h) (string-to-number h 16))
|
|
311 (split-string str "%")))
|
|
312 'utf-8))
|
|
313
|
|
314 (defun rng-uri-unescape-unibyte (str)
|
|
315 (replace-regexp-in-string "%[0-7][0-9a-fA-F]"
|
|
316 (lambda (h)
|
|
317 (string-to-number (substring h 1) 16))
|
|
318 str
|
|
319 t
|
|
320 t))
|
|
321
|
|
322 (defun rng-uri-unescape-unibyte-match (str)
|
|
323 (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]"
|
|
324 (lambda (match)
|
|
325 (if (string= match "*")
|
|
326 "\\([^/]*\\)"
|
|
327 (regexp-quote
|
|
328 (if (= (length match) 1)
|
|
329 match
|
|
330 (string-to-number (substring match 1)
|
|
331 16)))))
|
|
332 str
|
|
333 t
|
|
334 t))
|
|
335
|
|
336 (defun rng-uri-unescape-unibyte-replace (str next-match-index)
|
|
337 (replace-regexp-in-string
|
|
338 "%[0-7][0-9a-fA-F]\\|[^%]"
|
|
339 (lambda (match)
|
|
340 (if (string= match "*")
|
|
341 (let ((n next-match-index))
|
|
342 (setq next-match-index (1+ n))
|
|
343 (format "\\%s" n))
|
|
344 (let ((ch (if (= (length match) 1)
|
|
345 (aref match 0)
|
|
346 (string-to-number (substring match 1)
|
|
347 16))))
|
|
348 (if (eq ch ?\\)
|
|
349 (string ?\\ ?\\)
|
|
350 (string ch)))))
|
|
351 str
|
|
352 t
|
|
353 t))
|
|
354
|
|
355 (provide 'rng-uri)
|
|
356
|
86379
|
357 ;; arch-tag: c7b7b8b8-61d1-48ec-82bc-7001c70b2e9d
|
86361
|
358 ;;; rng-uri.el ends here
|