comparison lisp/xml.el @ 54877:1bf7ef48f54f

(xml-maybe-do-ns): New function to handle namespace parsing of both attribute and element names. (xml-ns-parse-ns-attrs, xml-ns-expand-el, xml-ns-expand-attr) (xml-intern-attrlist): Remove in favor of xml-maybe-do-ns. (xml-parse-tag): Update assumed namespaces. Clean up namespace parsing. (xml-parse-attlist): Make it do its own namespace parsing.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Wed, 14 Apr 2004 18:36:14 +0000
parents 586ffda6e9f9
children 9204ad91984c
comparison
equal deleted inserted replaced
54876:95ee18354a3a 54877:1bf7ef48f54f
50 ;; 50 ;;
51 ;; There can be only top level node, but with any number of children below. 51 ;; There can be only top level node, but with any number of children below.
52 52
53 ;;; LIST FORMAT 53 ;;; LIST FORMAT
54 54
55 ;; The functions `xml-parse-file' and `xml-parse-tag' return a list with 55 ;; The functions `xml-parse-file', `xml-parse-region' and
56 ;; the following format: 56 ;; `xml-parse-tag' return a list with the following format:
57 ;; 57 ;;
58 ;; xml-list ::= (node node ...) 58 ;; xml-list ::= (node node ...)
59 ;; node ::= (tag_name attribute-list . child_node_list) 59 ;; node ::= (qname attribute-list . child_node_list)
60 ;; child_node_list ::= child_node child_node ... 60 ;; child_node_list ::= child_node child_node ...
61 ;; child_node ::= node | string 61 ;; child_node ::= node | string
62 ;; tag_name ::= string 62 ;; qname ::= (:namespace-uri . "name") | "name"
63 ;; attribute_list ::= (("attribute" . "value") ("attribute" . "value") ...) 63 ;; attribute_list ::= ((qname . "value") (qname . "value") ...)
64 ;; | nil 64 ;; | nil
65 ;; string ::= "..." 65 ;; string ::= "..."
66 ;; 66 ;;
67 ;; Some macros are provided to ease the parsing of this list. 67 ;; Some macros are provided to ease the parsing of this list.
68 ;; Whitespace is preserved. Fixme: There should be a tree-walker that 68 ;; Whitespace is preserved. Fixme: There should be a tree-walker that
69 ;; can remove it. 69 ;; can remove it.
70
71 ;; TODO:
72 ;; * xml:base, xml:space support
73 ;; * more complete DOCTYPE parsing
74 ;; * pi support
70 75
71 ;;; Code: 76 ;;; Code:
72 77
73 ;; Note that {buffer-substring,match-string}-no-properties were 78 ;; Note that {buffer-substring,match-string}-no-properties were
74 ;; formerly used in several places, but that removes composition info. 79 ;; formerly used in several places, but that removes composition info.
228 (goto-char (point-max)))) 233 (goto-char (point-max))))
229 (if parse-dtd 234 (if parse-dtd
230 (cons dtd (nreverse xml)) 235 (cons dtd (nreverse xml))
231 (nreverse xml))))))) 236 (nreverse xml)))))))
232 237
233 (defun xml-ns-parse-ns-attrs (attr-list &optional xml-ns) 238 (defun xml-maybe-do-ns (name default xml-ns)
234 "Parse the namespace attributes and return a list of cons in the form: 239 "Perform any namespace expansion. NAME is the name to perform the expansion on.
235 \(namespace . prefix)" 240 DEFAULT is the default namespace. XML-NS is a cons of namespace
236 241 names to uris. When namespace-aware parsing is off, then XML-NS
237 (mapcar 242 is nil.
238 (lambda (attr) 243
239 (let* ((splitup (split-string (car attr) ":")) 244 During namespace-aware parsing, any name without a namespace is
240 (prefix (nth 0 splitup)) 245 put into the namespace identified by DEFAULT. nil is used to
241 (lname (nth 1 splitup))) 246 specify that the name shouldn't be given a namespace."
242 (when (string= "xmlns" prefix) 247 (if (consp xml-ns)
243 (push (cons (if lname 248 (let* ((nsp (string-match ":" name))
244 lname 249 (lname (if nsp (substring name (match-end 0)) name))
245 "") 250 (prefix (if nsp (substring name 0 (match-beginning 0)) default))
246 (cdr attr)) 251 (special (and (string-equal lname "xmlns") (not prefix)))
247 xml-ns)))) attr-list) 252 ;; Setting default to nil will insure that there is not
248 xml-ns) 253 ;; matching cons in xml-ns. In which case we
249 254 (ns (or (cdr (assoc (if special "xmlns" prefix)
250 ;; expand element names 255 xml-ns))
251 (defun xml-ns-expand-el (el xml-ns) 256 :)))
252 "Expand the XML elements from \"prefix:local-name\" to a cons in the form 257 (cons ns (if special "" lname)))
253 \"(namespace . local-name)\"." 258 (intern name)))
254
255 (let* ((splitup (split-string el ":"))
256 (lname (or (nth 1 splitup)
257 (nth 0 splitup)))
258 (prefix (if (nth 1 splitup)
259 (nth 0 splitup)
260 (if (string= lname "xmlns")
261 "xmlns"
262 "")))
263 (ns (cdr (assoc-string prefix xml-ns))))
264 (if (string= "" ns)
265 lname
266 (cons (intern (concat ":" ns))
267 lname))))
268
269 ;; expand attribute names
270 (defun xml-ns-expand-attr (attr-list xml-ns)
271 "Expand the attribute list for a particular element from the form
272 \"prefix:local-name\" to the form \"{namespace}:local-name\"."
273
274 (mapcar
275 (lambda (attr)
276 (let* ((splitup (split-string (car attr) ":"))
277 (lname (or (nth 1 splitup)
278 (nth 0 splitup)))
279 (prefix (if (nth 1 splitup)
280 (nth 0 splitup)
281 (if (string= (car attr) "xmlns")
282 "xmlns"
283 "")))
284 (ns (cdr (assoc-string prefix xml-ns))))
285 (setcar attr
286 (if (string= "" ns)
287 lname
288 (cons (intern (concat ":" ns))
289 lname)))))
290 attr-list)
291 attr-list)
292
293 (defun xml-intern-attrlist (attr-list)
294 "Convert attribute names to symbols for backward compatibility."
295 (mapcar (lambda (attr)
296 (setcar attr (intern (car attr))))
297 attr-list)
298 attr-list)
299 259
300 (defun xml-parse-tag (&optional parse-dtd parse-ns) 260 (defun xml-parse-tag (&optional parse-dtd parse-ns)
301 "Parse the tag at point. 261 "Parse the tag at point.
302 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and 262 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
303 returned as the first element in the list. 263 returned as the first element in the list.
308 - a pair : the first element is the DTD, the second is the node." 268 - a pair : the first element is the DTD, the second is the node."
309 (let ((xml-ns (if (consp parse-ns) 269 (let ((xml-ns (if (consp parse-ns)
310 parse-ns 270 parse-ns
311 (if parse-ns 271 (if parse-ns
312 (list 272 (list
313 ;; Default no namespace 273 ;; Default for empty prefix is no namespace
314 (cons "" "") 274 (cons "" :)
275 ;; "xml" namespace
276 (cons "xml" :http://www.w3.org/XML/1998/namespace)
315 ;; We need to seed the xmlns namespace 277 ;; We need to seed the xmlns namespace
316 (cons "xmlns" "http://www.w3.org/2000/xmlns/")))))) 278 (cons "xmlns" :http://www.w3.org/2000/xmlns/))))))
317 (cond 279 (cond
318 ;; Processing instructions (like the <?xml version="1.0"?> tag at the 280 ;; Processing instructions (like the <?xml version="1.0"?> tag at the
319 ;; beginning of a document). 281 ;; beginning of a document).
320 ((looking-at "<\\?") 282 ((looking-at "<\\?")
321 (search-forward "?>") 283 (search-forward "?>")
348 ((looking-at "<\\([^/>[:space:]]+\\)") 310 ((looking-at "<\\([^/>[:space:]]+\\)")
349 (goto-char (match-end 1)) 311 (goto-char (match-end 1))
350 312
351 ;; Parse this node 313 ;; Parse this node
352 (let* ((node-name (match-string 1)) 314 (let* ((node-name (match-string 1))
353 (attr-list (xml-parse-attlist)) 315 ;; Parse the attribute list.
354 (children (if (consp xml-ns) ;; take care of namespace parsing 316 (attrs (xml-parse-attlist xml-ns))
355 (progn 317 children pos)
356 (setq xml-ns (xml-ns-parse-ns-attrs 318
357 attr-list xml-ns)) 319 ;; add the xmlns:* attrs to our cache
358 (list (xml-ns-expand-attr 320 (when (consp xml-ns)
359 attr-list xml-ns) 321 (dolist (attr attrs)
360 (xml-ns-expand-el 322 (when (and (consp (car attr))
361 node-name xml-ns))) 323 (eq :http://www.w3.org/2000/xmlns/
362 (list (xml-intern-attrlist attr-list) 324 (caar attr)))
363 (intern node-name)))) 325 (push (cons (cdar attr) (intern (concat ":" (cdr attr))))
364 pos) 326 xml-ns))))
365 327
328 ;; expand element names
329 (setq node-name (list (xml-maybe-do-ns node-name "" xml-ns)))
330
331 (setq children (list attrs node-name))
366 ;; is this an empty element ? 332 ;; is this an empty element ?
367 (if (looking-at "/>") 333 (if (looking-at "/>")
368 (progn 334 (progn
369 (forward-char 2) 335 (forward-char 2)
370 (nreverse children)) 336 (nreverse children))
414 ;; This was an invalid start tag 380 ;; This was an invalid start tag
415 (error "XML: Invalid attribute list"))))) 381 (error "XML: Invalid attribute list")))))
416 (t ;; This is not a tag. 382 (t ;; This is not a tag.
417 (error "XML: Invalid character"))))) 383 (error "XML: Invalid character")))))
418 384
419 (defun xml-parse-attlist () 385 (defun xml-parse-attlist (&optional xml-ns)
420 "Return the attribute-list after point. Leave point at the 386 "Return the attribute-list after point. Leave point at the
421 first non-blank character after the tag." 387 first non-blank character after the tag."
422 (let ((attlist ()) 388 (let ((attlist ())
423 end-pos name) 389 end-pos name)
424 (skip-syntax-forward " ") 390 (skip-syntax-forward " ")
425 (while (looking-at (eval-when-compile 391 (while (looking-at (eval-when-compile
426 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) 392 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
427 (setq name (match-string 1)) 393 (setq end-pos (match-end 0))
428 (goto-char (match-end 0)) 394 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns))
395 (goto-char end-pos)
429 396
430 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize 397 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
431 398
432 ;; Do we have a string between quotes (or double-quotes), 399 ;; Do we have a string between quotes (or double-quotes),
433 ;; or a simple word ? 400 ;; or a simple word ?