86361
|
1 ;;; nxml-parse.el --- XML parser, sharing infrastructure with nxml-mode
|
|
2
|
|
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Author: James Clark
|
|
6 ;; Keywords: XML
|
|
7
|
|
8 ;; This program is free software; you can redistribute it and/or
|
|
9 ;; modify it under the terms of the GNU General Public License as
|
|
10 ;; published by the Free Software Foundation; either version 2 of
|
|
11 ;; the License, or (at your option) any later version.
|
|
12
|
|
13 ;; This program is distributed in the hope that it will be
|
|
14 ;; useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
15 ;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
16 ;; PURPOSE. See the GNU General Public License for more details.
|
|
17
|
|
18 ;; You should have received a copy of the GNU General Public
|
|
19 ;; License along with this program; if not, write to the Free
|
|
20 ;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
21 ;; MA 02111-1307 USA
|
|
22
|
|
23 ;;; Commentary:
|
|
24
|
|
25 ;; Entry point is `nxml-parse-file'.
|
|
26
|
|
27 ;;; Code:
|
|
28
|
|
29 (require 'nxml-util)
|
|
30 (require 'xmltok)
|
|
31 (require 'nxml-enc)
|
|
32 (require 'nxml-ns)
|
|
33
|
|
34 (defvar nxml-parse-file-name nil)
|
|
35
|
|
36 (defvar nxml-validate-function nil
|
|
37 "Nil or a function to be called by `nxml-parse-file' to perform validation.
|
|
38 The function will be called once for each start-tag or end-tag. The
|
|
39 function is passed two arguments TEXT and START-TAG. For a start-tag,
|
|
40 START-TAG is a list (NAME ATTRIBUTES) where NAME and ATTRIBUTES are in
|
|
41 the same form as returned by `nxml-parse-file. For an end-tag,
|
|
42 START-TAG is nil. TEXT is a string containing the text immediately
|
|
43 preceding the tag, or nil if there was no such text. An empty element
|
|
44 is treated as a start-tag followed by an end-tag.
|
|
45
|
|
46 For a start-tag, the namespace state will be the state after
|
|
47 processing the namespace declarations in the start-tag. For an
|
|
48 end-tag, the namespace state will be the state before popping the
|
|
49 namespace declarations for the corresponding start-tag.
|
|
50
|
|
51 The function must return nil if no error is detected or a
|
|
52 cons (MESSAGE . LOCATION) where MESSAGE is a string containing
|
|
53 an error message and LOCATION indicates what caused the error
|
|
54 as follows:
|
|
55
|
|
56 - nil indicates the tag as whole caused it; this is always allowed;
|
|
57
|
|
58 - text indicates the text caused it; this is allowed only if
|
|
59 TEXT is non-nil;
|
|
60
|
|
61 - tag-close indicates the close of the tag caused it; this is
|
|
62 allowed only if START-TAG is non-nil;
|
|
63
|
|
64 - (attribute-name . N) indicates that the name of the Nth attribute
|
|
65 caused it; N counts from 0; this is allowed only if START-TAG is non-nil
|
|
66 and N must be less than the number of attributes;
|
|
67
|
|
68 - (attribute-value . N) indicates that the value of the Nth attribute
|
|
69 caused it; N counts from 0; this is allowed only if START-TAG is non-nil
|
|
70 and N must be less than the number of attributes.")
|
|
71
|
|
72 (defun nxml-parse-file (file)
|
|
73 "Parse the XML document in FILE and return it as a list.
|
|
74 An XML element is represented as a list (NAME ATTRIBUTES . CHILDREN).
|
|
75 NAME is either a string, in the case where the name does not have a
|
|
76 namespace, or a cons (NAMESPACE . LOCAL-NAME), where NAMESPACE is a
|
|
77 symbol and LOCAL-NAME is a string, in the case where the name does
|
|
78 have a namespace. NAMESPACE is a keyword whose name is `:URI', where
|
|
79 URI is the namespace name. ATTRIBUTES is an alist of attributes where
|
|
80 each attribute has the form (NAME . VALUE), where NAME has the same
|
|
81 form as an element name, and VALUE is a string. A namespace
|
|
82 declaration is represented as an attribute whose name is
|
|
83 \(:http://www.w3.org/2000/xmlns/ . LOCAL-NAME). CHILDREN is a list
|
|
84 containing strings and child elements; CHILDREN never contains two
|
|
85 consecutive strings and never contains an empty string. Processing
|
|
86 instructions and comments are not represented. The return value is a
|
|
87 list representing the document element.
|
|
88
|
|
89 If the XML document is not well-formed, an error having the condition
|
|
90 `nxml-file-parse-error' will be signaled; the error data will be a
|
|
91 list of the \(FILE POSITION MESSAGE), where POSITION is an integer
|
|
92 specifying the position where the error was detected, and MESSAGE is a
|
|
93 string describing the error.
|
|
94
|
|
95 The current contents of FILE will be parsed even if there is a
|
|
96 modified buffer currently visiting FILE.
|
|
97
|
|
98 If the variable `nxml-validation-function' is non-nil, it will be
|
|
99 called twice for each element, and any reported error will be signaled
|
|
100 in the same way as well-formedness error."
|
|
101 (save-excursion
|
|
102 (set-buffer (nxml-parse-find-file file))
|
|
103 (unwind-protect
|
|
104 (let ((nxml-parse-file-name file))
|
|
105 (nxml-parse-instance))
|
|
106 (kill-buffer nil))))
|
|
107
|
|
108 (defun nxml-parse-find-file (file)
|
|
109 (save-excursion
|
|
110 (set-buffer (get-buffer-create " *nXML Parse*"))
|
|
111 (erase-buffer)
|
|
112 (let ((set-auto-coding-function 'nxml-set-xml-coding))
|
|
113 (insert-file-contents file))
|
|
114 (current-buffer)))
|
|
115
|
|
116 (defun nxml-parse-instance ()
|
|
117 (let (xmltok-dtd)
|
|
118 (xmltok-save
|
|
119 (xmltok-forward-prolog)
|
|
120 (nxml-check-xmltok-errors)
|
|
121 (nxml-ns-save
|
|
122 (nxml-parse-instance-1)))))
|
|
123
|
|
124 (defun nxml-parse-instance-1 ()
|
|
125 (let* ((top (cons nil nil))
|
|
126 ;; tail is a cons cell, whose cdr is nil
|
|
127 ;; additional elements will destructively appended to tail
|
|
128 (tail top)
|
|
129 ;; stack of tails one for each open element
|
|
130 tail-stack
|
|
131 ;; list of QNames of open elements
|
|
132 open-element-tags
|
|
133 ;; list of strings buffering a text node, in reverse order
|
|
134 text
|
|
135 ;; position of beginning of first (in buffer) string in text
|
|
136 text-pos)
|
|
137 (while (xmltok-forward)
|
|
138 (nxml-check-xmltok-errors)
|
|
139 (cond ((memq xmltok-type '(start-tag end-tag empty-element))
|
|
140 (when text
|
|
141 (setq text (apply 'concat (nreverse text)))
|
|
142 (setcdr tail (cons text nil))
|
|
143 (setq tail (cdr tail)))
|
|
144 (when (not (eq xmltok-type 'end-tag))
|
|
145 (when (and (not open-element-tags)
|
|
146 (not (eq tail top)))
|
|
147 (nxml-parse-error nil "Multiple top-level elements"))
|
|
148 (setq open-element-tags
|
|
149 (cons (xmltok-start-tag-qname)
|
|
150 open-element-tags))
|
|
151 (nxml-ns-push-state)
|
|
152 (let ((tag (nxml-parse-start-tag)))
|
|
153 (nxml-validate-tag text text-pos tag)
|
|
154 (setq text nil)
|
|
155 (setcdr tail (cons tag nil))
|
|
156 (setq tail (cdr tail))
|
|
157 (setq tail-stack (cons tail tail-stack))
|
|
158 (setq tail (last tag))))
|
|
159 (when (not (eq xmltok-type 'start-tag))
|
|
160 (or (eq xmltok-type 'empty-element)
|
|
161 (equal (car open-element-tags)
|
|
162 (xmltok-end-tag-qname))
|
|
163 (if open-element-tags
|
|
164 (nxml-parse-error nil
|
|
165 "Unbalanced end-tag; expected </%s>"
|
|
166 (car open-element-tags))
|
|
167 (nxml-parse-error nil "Extra end-tag")))
|
|
168 (nxml-validate-tag text text-pos nil)
|
|
169 (setq text nil)
|
|
170 (nxml-ns-pop-state)
|
|
171 (setq open-element-tags (cdr open-element-tags))
|
|
172 (setq tail (car tail-stack))
|
|
173 (setq tail-stack (cdr tail-stack)))
|
|
174 (setq text-pos nil))
|
|
175 ((memq xmltok-type '(space data entity-ref char-ref cdata-section))
|
|
176 (cond (open-element-tags
|
|
177 (unless text-pos
|
|
178 (setq text-pos xmltok-start))
|
|
179 (setq text
|
|
180 (cons (nxml-current-text-string) text)))
|
|
181 ((not (eq xmltok-type 'space))
|
|
182 (nxml-parse-error
|
|
183 nil
|
|
184 "%s at top-level"
|
|
185 (cdr (assq xmltok-type
|
|
186 '((data . "Text characters")
|
|
187 (entity-ref . "Entity reference")
|
|
188 (char-ref . "Character reference")
|
|
189 (cdata-section . "CDATA section"))))))))))
|
|
190 (unless (cdr top)
|
|
191 (nxml-parse-error (point-max) "Missing document element"))
|
|
192 (cadr top)))
|
|
193
|
|
194 (defun nxml-parse-start-tag ()
|
|
195 (let (parsed-attributes
|
|
196 parsed-namespace-attributes
|
|
197 atts att prefixes prefix ns value name)
|
|
198 (setq atts xmltok-namespace-attributes)
|
|
199 (while atts
|
|
200 (setq att (car atts))
|
|
201 (setq value (or (xmltok-attribute-value att)
|
|
202 (nxml-parse-error nil "Invalid attribute value")))
|
|
203 (setq ns (nxml-make-namespace value))
|
|
204 (setq prefix (and (xmltok-attribute-prefix att)
|
|
205 (xmltok-attribute-local-name att)))
|
|
206 (cond ((member prefix prefixes)
|
|
207 (nxml-parse-error nil "Duplicate namespace declaration"))
|
|
208 ((not prefix)
|
|
209 (nxml-ns-set-default ns))
|
|
210 (ns
|
|
211 (nxml-ns-set-prefix prefix ns))
|
|
212 (t (nxml-parse-error nil "Cannot undeclare namespace prefix")))
|
|
213 (setq prefixes (cons prefix prefixes))
|
|
214 (setq parsed-namespace-attributes
|
|
215 (cons (cons (nxml-make-name nxml-xmlns-namespace-uri
|
|
216 (xmltok-attribute-local-name att))
|
|
217 value)
|
|
218 parsed-namespace-attributes))
|
|
219 (setq atts (cdr atts)))
|
|
220 (setq name
|
|
221 (nxml-make-name
|
|
222 (let ((prefix (xmltok-start-tag-prefix)))
|
|
223 (if prefix
|
|
224 (or (nxml-ns-get-prefix prefix)
|
|
225 (nxml-parse-error (1+ xmltok-start)
|
|
226 "Prefix `%s' undeclared"
|
|
227 prefix))
|
|
228 (nxml-ns-get-default)))
|
|
229 (xmltok-start-tag-local-name)))
|
|
230 (setq atts xmltok-attributes)
|
|
231 (while atts
|
|
232 (setq att (car atts))
|
|
233 (setq ns
|
|
234 (let ((prefix (xmltok-attribute-prefix att)))
|
|
235 (and prefix
|
|
236 (or (nxml-ns-get-prefix prefix)
|
|
237 (nxml-parse-error (xmltok-attribute-name-start att)
|
|
238 "Prefix `%s' undeclared"
|
|
239 prefix)))))
|
|
240 (setq parsed-attributes
|
|
241 (let ((nm (nxml-make-name ns
|
|
242 (xmltok-attribute-local-name att))))
|
|
243 (when (assoc nm parsed-attributes)
|
|
244 (nxml-parse-error (xmltok-attribute-name-start att)
|
|
245 "Duplicate attribute"))
|
|
246 (cons (cons nm (or (xmltok-attribute-value att)
|
|
247 (nxml-parse-error nil "Invalid attribute value")))
|
|
248 parsed-attributes)))
|
|
249 (setq atts (cdr atts)))
|
|
250 ;; We want to end up with the attributes followed by the
|
|
251 ;; the namespace attributes in the same order as
|
|
252 ;; xmltok-attributes and xmltok-namespace-attributes respectively.
|
|
253 (when parsed-namespace-attributes
|
|
254 (setq parsed-attributes
|
|
255 (nconc parsed-namespace-attributes parsed-attributes)))
|
|
256 (list name (nreverse parsed-attributes))))
|
|
257
|
|
258 (defun nxml-validate-tag (text text-pos tag)
|
|
259 (when nxml-validate-function
|
|
260 (let ((err (funcall nxml-validate-function text tag))
|
|
261 pos)
|
|
262 (when err
|
|
263 (setq pos (nxml-validate-error-position (cdr err)
|
|
264 (and text text-pos)
|
|
265 tag))
|
|
266 (or pos (error "Incorrect return value from %s"
|
|
267 nxml-validate-function))
|
|
268 (nxml-parse-error pos (car err))))))
|
|
269
|
|
270 (defun nxml-validate-error-position (location text-pos tag)
|
|
271 (cond ((null location) xmltok-start)
|
|
272 ((eq location 'text) text-pos)
|
|
273 ((eq location 'tag-close)
|
|
274 (and tag (- (point) (if (eq xmltok-type 'empty-element ) 2 1))))
|
|
275 ((consp location)
|
|
276 (let ((att (nth (cdr location) xmltok-attributes)))
|
|
277 (when (not att)
|
|
278 (setq att (nth (- (cdr location) (length xmltok-attributes))
|
|
279 xmltok-namespace-attributes)))
|
|
280 (cond ((not att))
|
|
281 ((eq (car location) 'attribute-name)
|
|
282 (xmltok-attribute-name-start att))
|
|
283 ((eq (car location) 'attribute-value)
|
|
284 (xmltok-attribute-value-start att)))))))
|
|
285
|
|
286 (defun nxml-make-name (ns local-name)
|
|
287 (if ns
|
|
288 (cons ns local-name)
|
|
289 local-name))
|
|
290
|
|
291 (defun nxml-current-text-string ()
|
|
292 (cond ((memq xmltok-type '(space data))
|
|
293 (buffer-substring-no-properties xmltok-start
|
|
294 (point)))
|
|
295 ((eq xmltok-type 'cdata-section)
|
|
296 (buffer-substring-no-properties (+ xmltok-start 9)
|
|
297 (- (point) 3)))
|
|
298 ((memq xmltok-type '(char-ref entity-ref))
|
|
299 (unless xmltok-replacement
|
|
300 (nxml-parse-error nil
|
|
301 (if (eq xmltok-type 'char-ref)
|
|
302 "Reference to unsupported Unicode character"
|
|
303 "Unresolvable entity reference")))
|
|
304 xmltok-replacement)))
|
|
305
|
|
306 (defun nxml-parse-error (position &rest args)
|
|
307 (nxml-signal-file-parse-error nxml-parse-file-name
|
|
308 (or position xmltok-start)
|
|
309 (apply 'format args)))
|
|
310
|
|
311 (defun nxml-check-xmltok-errors ()
|
|
312 (when xmltok-errors
|
|
313 (let ((err (car (last xmltok-errors))))
|
|
314 (nxml-signal-file-parse-error nxml-parse-file-name
|
|
315 (xmltok-error-start err)
|
|
316 (xmltok-error-message err)))))
|
|
317
|
|
318 (provide 'nxml-parse)
|
|
319
|
|
320 ;;; nxml-parse.el ends here
|