Mercurial > emacs
comparison lisp/xml.el @ 34825:2cad4cde52bd
(top level comment): Updated to reflect the fact that
white spaces are relevant in the XML file
(xml-parse-file): Do not kill an existing Emacs buffer if the file
to parse was already edited. This allows for on-the-fly analysis
of XML files
(xml-parse-tag): Check that the casing is the same in the start
tag and end tag, since XML is case-sensitive. Allows for spaces
in the end tag, after the name of the tag.
(xml-parse-attlist): Allow for the character '-' in the name of
attributes, as in the standard http-equiv attribute Do not save
the properties in the XML tree, since they are not relevant
author | Gerd Moellmann <gerd@gnu.org> |
---|---|
date | Fri, 22 Dec 2000 12:20:18 +0000 |
parents | fd338013d333 |
children | d1fdbba91c71 |
comparison
equal
deleted
inserted
replaced
34824:6e2fee176fe7 | 34825:2cad4cde52bd |
---|---|
37 ;; | 37 ;; |
38 ;; It also knows how to ignore comments, as well as the special ?xml? tag | 38 ;; It also knows how to ignore comments, as well as the special ?xml? tag |
39 ;; in the XML file. | 39 ;; in the XML file. |
40 ;; | 40 ;; |
41 ;; The XML file should have the following format: | 41 ;; The XML file should have the following format: |
42 ;; <node1 attr1="name1" attr2="name2" ...> value | 42 ;; <node1 attr1="name1" attr2="name2" ...>value |
43 ;; <node2 attr3="name3" attr4="name4"> value2 </node2> | 43 ;; <node2 attr3="name3" attr4="name4">value2</node2> |
44 ;; <node3 attr5="name5" attr6="name6"> value3 </node3> | 44 ;; <node3 attr5="name5" attr6="name6">value3</node3> |
45 ;; </node1> | 45 ;; </node1> |
46 ;; Of course, the name of the nodes and attributes can be anything. There can | 46 ;; Of course, the name of the nodes and attributes can be anything. There can |
47 ;; be any number of attributes (or none), as well as any number of children | 47 ;; be any number of attributes (or none), as well as any number of children |
48 ;; below the nodes. | 48 ;; below the nodes. |
49 ;; | 49 ;; |
116 ;;** | 116 ;;** |
117 ;;******************************************************************* | 117 ;;******************************************************************* |
118 | 118 |
119 (defun xml-parse-file (file &optional parse-dtd) | 119 (defun xml-parse-file (file &optional parse-dtd) |
120 "Parse the well-formed XML FILE. | 120 "Parse the well-formed XML FILE. |
121 If FILE is already edited, this will keep the buffer alive. | |
121 Returns the top node with all its children. | 122 Returns the top node with all its children. |
122 If PARSE-DTD is non-nil, the DTD is parsed rather than skipped." | 123 If PARSE-DTD is non-nil, the DTD is parsed rather than skipped." |
123 (find-file file) | 124 (let ((keep)) |
124 (let ((xml (xml-parse-region (point-min) | 125 (if (get-file-buffer file) |
125 (point-max) | 126 (progn |
126 (current-buffer) | 127 (set-buffer (get-file-buffer file)) |
127 parse-dtd))) | 128 (setq keep (point))) |
128 (kill-buffer (current-buffer)) | 129 (find-file file)) |
129 xml)) | 130 |
131 (let ((xml (xml-parse-region (point-min) | |
132 (point-max) | |
133 (current-buffer) | |
134 parse-dtd))) | |
135 (if keep | |
136 (goto-char keep) | |
137 (kill-buffer (current-buffer))) | |
138 xml))) | |
130 | 139 |
131 (defun xml-parse-region (beg end &optional buffer parse-dtd) | 140 (defun xml-parse-region (beg end &optional buffer parse-dtd) |
132 "Parse the region from BEG to END in BUFFER. | 141 "Parse the region from BEG to END in BUFFER. |
133 If BUFFER is nil, it defaults to the current buffer. | 142 If BUFFER is nil, it defaults to the current buffer. |
134 Returns the XML list for the region, or raises an error if the region | 143 Returns the XML list for the region, or raises an error if the region |
204 '()) | 213 '()) |
205 ;; opening tag | 214 ;; opening tag |
206 ((looking-at "<\\([^/> \t\n]+\\)") | 215 ((looking-at "<\\([^/> \t\n]+\\)") |
207 (let* ((node-name (match-string 1)) | 216 (let* ((node-name (match-string 1)) |
208 (children (list (intern node-name))) | 217 (children (list (intern node-name))) |
218 (case-fold-search nil) ;; XML is case-sensitive | |
209 pos) | 219 pos) |
210 (goto-char (match-end 1)) | 220 (goto-char (match-end 1)) |
211 | 221 |
212 ;; parses the attribute list | 222 ;; parses the attribute list |
213 (set 'children (append children (list (xml-parse-attlist end)))) | 223 (set 'children (append children (list (xml-parse-attlist end)))) |
222 ;; is this a valid start tag ? | 232 ;; is this a valid start tag ? |
223 (if (= (char-after) ?>) | 233 (if (= (char-after) ?>) |
224 (progn | 234 (progn |
225 (forward-char 1) | 235 (forward-char 1) |
226 (skip-chars-forward " \t\n") | 236 (skip-chars-forward " \t\n") |
227 (while (not (looking-at (concat "</" node-name ">"))) | 237 ;; Now check that we have the right end-tag. Note that this one might |
238 ;; contain spaces after the tag name | |
239 (while (not (looking-at (concat "</" node-name "[ \t\n]*>"))) | |
228 (cond | 240 (cond |
229 ((looking-at "</") | 241 ((looking-at "</") |
230 (error (concat | 242 (error (concat |
231 "XML: invalid syntax -- invalid end tag (expecting " | 243 "XML: invalid syntax -- invalid end tag (expecting " |
232 node-name | 244 node-name |
233 ")"))) | 245 ") at pos " (number-to-string (point))))) |
234 ((= (char-after) ?<) | 246 ((= (char-after) ?<) |
235 (set 'children (append children (list (xml-parse-tag end))))) | 247 (set 'children (append children (list (xml-parse-tag end))))) |
236 (t | 248 (t |
237 (set 'pos (point)) | 249 (set 'pos (point)) |
238 (search-forward "<" end) | 250 (search-forward "<" end) |
267 The search for attributes end at the position END in the current buffer. | 279 The search for attributes end at the position END in the current buffer. |
268 Leaves the point on the first non-blank character after the tag." | 280 Leaves the point on the first non-blank character after the tag." |
269 (let ((attlist '()) | 281 (let ((attlist '()) |
270 name) | 282 name) |
271 (skip-chars-forward " \t\n") | 283 (skip-chars-forward " \t\n") |
272 (while (looking-at "\\([a-zA-Z_:][a-zA-Z0-9.-_:]*\\)[ \t\n]*=[ \t\n]*") | 284 (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n]*=[ \t\n]*") |
273 (set 'name (intern (match-string 1))) | 285 (set 'name (intern (match-string 1))) |
274 (goto-char (match-end 0)) | 286 (goto-char (match-end 0)) |
275 | 287 |
276 ;; Do we have a string between quotes (or double-quotes), | 288 ;; Do we have a string between quotes (or double-quotes), |
277 ;; or a simple word ? | 289 ;; or a simple word ? |
282 ;; Each attribute must be unique within a given element | 294 ;; Each attribute must be unique within a given element |
283 (if (assoc name attlist) | 295 (if (assoc name attlist) |
284 (error "XML: each attribute must be unique within an element.")) | 296 (error "XML: each attribute must be unique within an element.")) |
285 | 297 |
286 (set 'attlist (append attlist | 298 (set 'attlist (append attlist |
287 (list (cons name (match-string 1))))) | 299 (list (cons name (match-string-no-properties 1))))) |
288 (goto-char (match-end 0)) | 300 (goto-char (match-end 0)) |
289 (skip-chars-forward " \t\n") | 301 (skip-chars-forward " \t\n") |
290 (if (> (point) end) | 302 (if (> (point) end) |
291 (error "XML: end of attribute list not found before end of region.")) | 303 (error "XML: end of attribute list not found before end of region.")) |
292 ) | 304 ) |