comparison lisp/xml.el @ 34825:2cad4cde52bd

(top level comment): Updated to reflect the fact that white spaces are relevant in the XML file (xml-parse-file): Do not kill an existing Emacs buffer if the file to parse was already edited. This allows for on-the-fly analysis of XML files (xml-parse-tag): Check that the casing is the same in the start tag and end tag, since XML is case-sensitive. Allows for spaces in the end tag, after the name of the tag. (xml-parse-attlist): Allow for the character '-' in the name of attributes, as in the standard http-equiv attribute Do not save the properties in the XML tree, since they are not relevant
author Gerd Moellmann <gerd@gnu.org>
date Fri, 22 Dec 2000 12:20:18 +0000
parents fd338013d333
children d1fdbba91c71
comparison
equal deleted inserted replaced
34824:6e2fee176fe7 34825:2cad4cde52bd
37 ;; 37 ;;
38 ;; It also knows how to ignore comments, as well as the special ?xml? tag 38 ;; It also knows how to ignore comments, as well as the special ?xml? tag
39 ;; in the XML file. 39 ;; in the XML file.
40 ;; 40 ;;
41 ;; The XML file should have the following format: 41 ;; The XML file should have the following format:
42 ;; <node1 attr1="name1" attr2="name2" ...> value 42 ;; <node1 attr1="name1" attr2="name2" ...>value
43 ;; <node2 attr3="name3" attr4="name4"> value2 </node2> 43 ;; <node2 attr3="name3" attr4="name4">value2</node2>
44 ;; <node3 attr5="name5" attr6="name6"> value3 </node3> 44 ;; <node3 attr5="name5" attr6="name6">value3</node3>
45 ;; </node1> 45 ;; </node1>
46 ;; Of course, the name of the nodes and attributes can be anything. There can 46 ;; Of course, the name of the nodes and attributes can be anything. There can
47 ;; be any number of attributes (or none), as well as any number of children 47 ;; be any number of attributes (or none), as well as any number of children
48 ;; below the nodes. 48 ;; below the nodes.
49 ;; 49 ;;
116 ;;** 116 ;;**
117 ;;******************************************************************* 117 ;;*******************************************************************
118 118
119 (defun xml-parse-file (file &optional parse-dtd) 119 (defun xml-parse-file (file &optional parse-dtd)
120 "Parse the well-formed XML FILE. 120 "Parse the well-formed XML FILE.
121 If FILE is already edited, this will keep the buffer alive.
121 Returns the top node with all its children. 122 Returns the top node with all its children.
122 If PARSE-DTD is non-nil, the DTD is parsed rather than skipped." 123 If PARSE-DTD is non-nil, the DTD is parsed rather than skipped."
123 (find-file file) 124 (let ((keep))
124 (let ((xml (xml-parse-region (point-min) 125 (if (get-file-buffer file)
125 (point-max) 126 (progn
126 (current-buffer) 127 (set-buffer (get-file-buffer file))
127 parse-dtd))) 128 (setq keep (point)))
128 (kill-buffer (current-buffer)) 129 (find-file file))
129 xml)) 130
131 (let ((xml (xml-parse-region (point-min)
132 (point-max)
133 (current-buffer)
134 parse-dtd)))
135 (if keep
136 (goto-char keep)
137 (kill-buffer (current-buffer)))
138 xml)))
130 139
131 (defun xml-parse-region (beg end &optional buffer parse-dtd) 140 (defun xml-parse-region (beg end &optional buffer parse-dtd)
132 "Parse the region from BEG to END in BUFFER. 141 "Parse the region from BEG to END in BUFFER.
133 If BUFFER is nil, it defaults to the current buffer. 142 If BUFFER is nil, it defaults to the current buffer.
134 Returns the XML list for the region, or raises an error if the region 143 Returns the XML list for the region, or raises an error if the region
204 '()) 213 '())
205 ;; opening tag 214 ;; opening tag
206 ((looking-at "<\\([^/> \t\n]+\\)") 215 ((looking-at "<\\([^/> \t\n]+\\)")
207 (let* ((node-name (match-string 1)) 216 (let* ((node-name (match-string 1))
208 (children (list (intern node-name))) 217 (children (list (intern node-name)))
218 (case-fold-search nil) ;; XML is case-sensitive
209 pos) 219 pos)
210 (goto-char (match-end 1)) 220 (goto-char (match-end 1))
211 221
212 ;; parses the attribute list 222 ;; parses the attribute list
213 (set 'children (append children (list (xml-parse-attlist end)))) 223 (set 'children (append children (list (xml-parse-attlist end))))
222 ;; is this a valid start tag ? 232 ;; is this a valid start tag ?
223 (if (= (char-after) ?>) 233 (if (= (char-after) ?>)
224 (progn 234 (progn
225 (forward-char 1) 235 (forward-char 1)
226 (skip-chars-forward " \t\n") 236 (skip-chars-forward " \t\n")
227 (while (not (looking-at (concat "</" node-name ">"))) 237 ;; Now check that we have the right end-tag. Note that this one might
238 ;; contain spaces after the tag name
239 (while (not (looking-at (concat "</" node-name "[ \t\n]*>")))
228 (cond 240 (cond
229 ((looking-at "</") 241 ((looking-at "</")
230 (error (concat 242 (error (concat
231 "XML: invalid syntax -- invalid end tag (expecting " 243 "XML: invalid syntax -- invalid end tag (expecting "
232 node-name 244 node-name
233 ")"))) 245 ") at pos " (number-to-string (point)))))
234 ((= (char-after) ?<) 246 ((= (char-after) ?<)
235 (set 'children (append children (list (xml-parse-tag end))))) 247 (set 'children (append children (list (xml-parse-tag end)))))
236 (t 248 (t
237 (set 'pos (point)) 249 (set 'pos (point))
238 (search-forward "<" end) 250 (search-forward "<" end)
267 The search for attributes end at the position END in the current buffer. 279 The search for attributes end at the position END in the current buffer.
268 Leaves the point on the first non-blank character after the tag." 280 Leaves the point on the first non-blank character after the tag."
269 (let ((attlist '()) 281 (let ((attlist '())
270 name) 282 name)
271 (skip-chars-forward " \t\n") 283 (skip-chars-forward " \t\n")
272 (while (looking-at "\\([a-zA-Z_:][a-zA-Z0-9.-_:]*\\)[ \t\n]*=[ \t\n]*") 284 (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n]*=[ \t\n]*")
273 (set 'name (intern (match-string 1))) 285 (set 'name (intern (match-string 1)))
274 (goto-char (match-end 0)) 286 (goto-char (match-end 0))
275 287
276 ;; Do we have a string between quotes (or double-quotes), 288 ;; Do we have a string between quotes (or double-quotes),
277 ;; or a simple word ? 289 ;; or a simple word ?
282 ;; Each attribute must be unique within a given element 294 ;; Each attribute must be unique within a given element
283 (if (assoc name attlist) 295 (if (assoc name attlist)
284 (error "XML: each attribute must be unique within an element.")) 296 (error "XML: each attribute must be unique within an element."))
285 297
286 (set 'attlist (append attlist 298 (set 'attlist (append attlist
287 (list (cons name (match-string 1))))) 299 (list (cons name (match-string-no-properties 1)))))
288 (goto-char (match-end 0)) 300 (goto-char (match-end 0))
289 (skip-chars-forward " \t\n") 301 (skip-chars-forward " \t\n")
290 (if (> (point) end) 302 (if (> (point) end)
291 (error "XML: end of attribute list not found before end of region.")) 303 (error "XML: end of attribute list not found before end of region."))
292 ) 304 )