Mercurial > emacs
annotate lisp/nxml/rng-uri.el @ 102453:c861e9f4fc71
*** empty log message ***
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 09 Mar 2009 01:12:39 +0000 |
parents | a9dc0e7c3f2b |
children | 1d1d5d9bd884 |
rev | line source |
---|---|
86361 | 1 ;;; rng-uri.el --- URI parsing and manipulation |
2 | |
100908 | 3 ;; Copyright (C) 2003, 2007, 2008, 2009 Free Software Foundation, Inc. |
86361 | 4 |
5 ;; Author: James Clark | |
6 ;; Keywords: XML | |
7 | |
86553 | 8 ;; This file is part of GNU Emacs. |
9 | |
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
10 ;; GNU Emacs is free software: you can redistribute it and/or modify |
86553 | 11 ;; it under the terms of the GNU General Public License as published by |
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
12 ;; the Free Software Foundation, either version 3 of the License, or |
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
13 ;; (at your option) any later version. |
86361 | 14 |
86553 | 15 ;; GNU Emacs is distributed in the hope that it will be useful, |
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 ;; GNU General Public License for more details. | |
86361 | 19 |
86553 | 20 ;; You should have received a copy of the GNU General Public License |
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
86361 | 22 |
23 ;;; Commentary: | |
24 | |
25 ;;; Code: | |
26 | |
27 (defun rng-file-name-uri (f) | |
28 "Return a URI for the filename F. | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
29 Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to |
86361 | 30 escape them using %HH." |
31 (setq f (expand-file-name f)) | |
32 (let ((url | |
33 (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]" | |
34 'rng-percent-encode | |
35 f))) | |
36 (concat "file:" | |
37 (if (and (> (length url) 0) | |
38 (= (aref url 0) ?/)) | |
39 "//" | |
40 "///") | |
41 url))) | |
42 | |
43 (defun rng-uri-escape-multibyte (uri) | |
44 "Escape multibyte characters in URI." | |
45 (replace-regexp-in-string "[:nonascii:]" | |
46 'rng-percent-encode | |
47 (encode-coding-string uri 'utf-8))) | |
48 | |
49 (defun rng-percent-encode (str) | |
50 (apply 'concat | |
51 (mapcar (lambda (ch) | |
52 (format "%%%x%x" (/ ch 16) (% ch 16))) | |
53 (string-to-list str)))) | |
54 | |
55 | |
56 (defun rng-uri-file-name (uri) | |
57 "Return the filename represented by a URI. | |
58 Signal an error if URI is not a valid file URL." | |
59 (rng-uri-file-name-1 uri nil)) | |
60 | |
61 (defun rng-uri-pattern-file-name-regexp (pattern) | |
62 "Return a regexp for filenames represented by URIs that match PATTERN." | |
63 (rng-uri-file-name-1 pattern 'match)) | |
64 | |
65 (defun rng-uri-pattern-file-name-replace-match (pattern) | |
66 (rng-uri-file-name-1 pattern 'replace)) | |
67 | |
68 ;; pattern is either nil or match or replace | |
69 (defun rng-uri-file-name-1 (uri pattern) | |
70 (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri) | |
71 (rng-uri-error "Bad escapes in URI `%s'" uri)) | |
72 (setq uri (rng-uri-unescape-multibyte uri)) | |
73 (let* ((components | |
74 (or (rng-uri-split uri) | |
75 (rng-uri-error "Cannot split URI `%s' into its components" uri))) | |
76 (scheme (nth 0 components)) | |
77 (authority (nth 1 components)) | |
78 (path (nth 2 components)) | |
79 (absolutep (string-match "\\`/" path)) | |
80 (query (nth 3 components)) | |
81 (fragment-id (nth 4 components))) | |
82 (cond ((not scheme) | |
83 (unless pattern | |
84 (rng-uri-error "URI `%s' does not have a scheme" uri))) | |
85 ((not (string= (downcase scheme) "file")) | |
86 (rng-uri-error "URI `%s' does not use the `file:' scheme" uri))) | |
87 (when (not (member authority | |
88 (cons system-name '(nil "" "localhost")))) | |
89 (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'" | |
90 uri)) | |
91 (when query | |
92 (rng-uri-error "`?' not escaped in file URI `%s'" uri)) | |
93 (when fragment-id | |
94 (rng-uri-error "URI `%s' has a fragment identifier" uri)) | |
95 (when (string-match ";" path) | |
96 (rng-uri-error "`;' not escaped in URI `%s'" uri)) | |
97 (when (string-match "%2[fF]" path) ;; 2f is hex code of slash | |
98 (rng-uri-error "Escaped slash in URI `%s'" uri)) | |
99 (when (and (eq system-type 'windows-nt) | |
100 absolutep | |
101 (file-name-absolute-p (substring path 1))) | |
102 (setq path (substring path 1))) | |
103 (when (and pattern (string-match "\\`\\./" path)) | |
104 (setq path (substring path 2))) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
105 (setq path |
86361 | 106 (cond ((eq pattern 'match) |
107 (rng-uri-unescape-unibyte-match path)) | |
108 ((eq pattern 'replace) | |
109 (rng-uri-unescape-unibyte-replace path 2)) | |
110 (t | |
111 (rng-uri-unescape-unibyte path)))) | |
112 (when (string-match "\000" path) | |
113 (rng-uri-error "URI `%s' has NUL character in path" uri)) | |
114 (when (eq pattern 'match) | |
115 (setq path | |
116 (concat (if absolutep | |
117 "\\(\\)" | |
118 "\\(\\(?:[^/]*/\\)*\\)") | |
119 path))) | |
120 (cond ((eq pattern 'match) | |
121 (concat "\\`" path "\\'")) | |
122 ((and (eq pattern 'replace) | |
123 (not absolutep)) | |
124 (concat "\\1" path)) | |
125 (t path)))) | |
126 | |
127 (defun rng-uri-error (&rest args) | |
128 (signal 'rng-uri-error (list (apply 'format args)))) | |
129 | |
130 (put 'rng-uri-error 'error-conditions '(error rng-uri-error)) | |
131 (put 'rng-uri-error 'error-message "Invalid URI") | |
132 | |
133 (defun rng-uri-split (str) | |
134 (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\ | |
135 \\(?://\\([^/?#]*\\)\\)?\ | |
136 \\([^?#]*\\)\ | |
137 \\(?:\\?\\([^#]*\\)\\)?\ | |
138 \\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'" | |
139 str) | |
140 (list (match-string 1 str) | |
141 (match-string 2 str) | |
142 (match-string 3 str) | |
143 (match-string 4 str) | |
144 (match-string 5 str)))) | |
145 | |
146 (defun rng-uri-join (scheme authority path &optional query fragment-id) | |
147 (when path | |
148 (let (parts) | |
149 (when fragment-id | |
150 (setq parts (list "#" fragment-id))) | |
151 (when query | |
152 (setq parts | |
153 (cons "?" | |
154 (cons query parts)))) | |
155 (setq parts (cons path parts)) | |
156 (when authority | |
157 (setq parts | |
158 (cons "//" | |
159 (cons authority parts)))) | |
160 (when scheme | |
161 (setq parts | |
162 (cons scheme | |
163 (cons ":" parts)))) | |
164 (apply 'concat parts)))) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
165 |
86361 | 166 (defun rng-uri-resolve (uri-ref base-uri) |
167 "Resolve a possibly relative URI reference into absolute form. | |
168 URI-REF is the URI reference to be resolved. | |
169 BASE-URI is the base URI to use for resolving it. | |
170 The algorithm is specified by RFC 2396. | |
171 If there is some problem with URI-REF or BASE-URI, then | |
172 URI-REF will be returned." | |
173 (let* ((components (rng-uri-split uri-ref)) | |
174 (scheme (nth 0 components)) | |
175 (authority (nth 1 components)) | |
176 (path (nth 2 components)) | |
177 (query (nth 3 components)) | |
178 (fragment-id (nth 4 components)) | |
179 (base-components (rng-uri-split base-uri))) | |
180 (if (or (not components) | |
181 scheme | |
182 (not base-components) | |
183 (not (nth 0 base-components))) | |
184 uri-ref | |
185 (setq scheme (nth 0 base-components)) | |
186 (when (not authority) | |
187 (setq authority (nth 1 base-components)) | |
188 (if (and (equal path "") (not query)) | |
189 ;; Handle same document reference by returning | |
190 ;; same URI (RFC 2396bis does this too). | |
191 (setq path (nth 2 base-components) | |
192 query (nth 3 base-components)) | |
193 (setq path (rng-resolve-path path (nth 2 base-components))))) | |
194 (rng-uri-join scheme | |
195 authority | |
196 path | |
197 query | |
198 fragment-id)))) | |
199 | |
200 ;; See RFC 2396 5.2, steps 5 and 6 | |
201 (defun rng-resolve-path (path base-path) | |
202 ;; Step 5 | |
203 (if (or (string-match "\\`/" path) | |
204 (not (string-match "\\`/" base-path))) | |
205 path | |
206 ;; Step 6 | |
207 ;; (a), (b) | |
208 (let ((segments (rng-split-path path)) | |
209 (base-segments (rng-split-path base-path))) | |
210 (if (> (length base-segments) 1) | |
211 (setq segments (nconc (nbutlast base-segments) | |
212 segments)) | |
213 (setcar segments | |
214 (concat (car base-segments) (car segments)))) | |
215 ;; (d) | |
216 (let ((last-segment (last segments))) | |
217 (when (equal (car last-segment) ".") | |
218 (setcar last-segment ""))) | |
219 ;; (c) | |
220 (setq segments (delete "." segments)) | |
221 ;; (e) | |
222 (let (iter matched) | |
223 (while (progn | |
224 (setq matched nil) | |
225 (setq iter (cdr segments)) | |
226 (while (and iter (not matched)) | |
227 (if (or (not (equal (cadr iter) "..")) | |
228 (equal (car iter) "..")) | |
229 (setq iter (cdr iter)) | |
230 (setcar iter nil) | |
231 (setcar (cdr iter) | |
232 ;; (f) | |
233 (if (cddr iter) nil "")) | |
234 (setq matched t) | |
235 (setq segments (delq nil segments)))) | |
236 matched))) | |
237 (rng-join-path segments)))) | |
238 | |
239 (defun rng-relative-uri (full base) | |
240 "Return a URI that relative to BASE is equivalent to FULL. | |
241 The returned URI will be relative if possible. | |
242 Both FULL and BASE must be absolute URIs." | |
243 (let* ((components (rng-uri-split full)) | |
244 (scheme (nth 0 components)) | |
245 (authority (nth 1 components)) | |
246 (path (nth 2 components)) | |
247 (query (nth 3 components)) | |
248 (fragment-id (nth 4 components)) | |
249 (base-components (rng-uri-split base))) | |
250 (if (and components | |
251 base-components | |
252 scheme | |
253 (equal scheme | |
254 (nth 0 base-components))) | |
255 (progn | |
256 (setq scheme nil) | |
257 (when (and authority | |
258 (equal authority | |
259 (nth 1 base-components))) | |
260 (setq authority nil) | |
261 (setq path (rng-relative-path path (nth 2 base-components)))) | |
262 (rng-uri-join scheme authority path query fragment-id)) | |
263 full))) | |
264 | |
265 (defun rng-relative-path (path base-path) | |
266 (let ((segments (rng-split-path path)) | |
267 (base-segments (rng-split-path base-path))) | |
268 (when (> (length base-segments) 1) | |
269 (setq base-segments (nbutlast base-segments))) | |
270 (if (or (member "." segments) | |
271 (member ".." segments) | |
272 (member "." base-segments) | |
273 (member ".." base-segments)) | |
274 path | |
275 (while (and segments | |
276 base-segments | |
277 (string= (car segments) | |
278 (car base-segments))) | |
279 (setq segments (cdr segments)) | |
280 (setq base-segments (cdr base-segments))) | |
281 (while base-segments | |
282 (setq base-segments (cdr base-segments)) | |
283 (setq segments (cons ".." segments))) | |
284 (when (equal (car segments) "") | |
285 (setq segments (cons "." segments))) | |
286 (rng-join-path segments)))) | |
287 | |
288 (defun rng-split-path (path) | |
289 (let ((start 0) | |
290 segments) | |
291 (while (string-match "/" path start) | |
292 (setq segments (cons (substring path start (match-beginning 0)) | |
293 segments)) | |
294 (setq start (match-end 0))) | |
295 (nreverse (cons (substring path start) segments)))) | |
296 | |
297 (defun rng-join-path (segments) | |
298 (and segments | |
299 (mapconcat 'identity segments "/"))) | |
300 | |
301 (defun rng-uri-unescape-multibyte (str) | |
302 (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+" | |
303 'rng-multibyte-percent-decode | |
304 str)) | |
305 | |
306 (defun rng-multibyte-percent-decode (str) | |
307 (decode-coding-string (apply 'string | |
308 (mapcar (lambda (h) (string-to-number h 16)) | |
309 (split-string str "%"))) | |
310 'utf-8)) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
311 |
86361 | 312 (defun rng-uri-unescape-unibyte (str) |
313 (replace-regexp-in-string "%[0-7][0-9a-fA-F]" | |
314 (lambda (h) | |
315 (string-to-number (substring h 1) 16)) | |
316 str | |
317 t | |
318 t)) | |
319 | |
320 (defun rng-uri-unescape-unibyte-match (str) | |
321 (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]" | |
322 (lambda (match) | |
323 (if (string= match "*") | |
324 "\\([^/]*\\)" | |
325 (regexp-quote | |
326 (if (= (length match) 1) | |
327 match | |
328 (string-to-number (substring match 1) | |
329 16))))) | |
330 str | |
331 t | |
332 t)) | |
333 | |
334 (defun rng-uri-unescape-unibyte-replace (str next-match-index) | |
335 (replace-regexp-in-string | |
336 "%[0-7][0-9a-fA-F]\\|[^%]" | |
337 (lambda (match) | |
338 (if (string= match "*") | |
339 (let ((n next-match-index)) | |
340 (setq next-match-index (1+ n)) | |
341 (format "\\%s" n)) | |
342 (let ((ch (if (= (length match) 1) | |
343 (aref match 0) | |
344 (string-to-number (substring match 1) | |
345 16)))) | |
346 (if (eq ch ?\\) | |
347 (string ?\\ ?\\) | |
348 (string ch))))) | |
349 str | |
350 t | |
351 t)) | |
352 | |
353 (provide 'rng-uri) | |
354 | |
86379 | 355 ;; arch-tag: c7b7b8b8-61d1-48ec-82bc-7001c70b2e9d |
86361 | 356 ;;; rng-uri.el ends here |