comparison lisp/xml.el @ 89909:68c22ea6027c

Sync to HEAD
author Kenichi Handa <handa@m17n.org>
date Fri, 16 Apr 2004 12:51:06 +0000
parents 375f2633d815
children 4c90ffeb71c5
comparison
equal deleted inserted replaced
89908:ee1402f7b568 89909:68c22ea6027c
1 ;;; xml.el --- XML parser 1 ;;; xml.el --- XML parser
2 2
3 ;; Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. 3 ;; Copyright (C) 2000, 01, 03, 2004 Free Software Foundation, Inc.
4 4
5 ;; Author: Emmanuel Briot <briot@gnat.com> 5 ;; Author: Emmanuel Briot <briot@gnat.com>
6 ;; Maintainer: Mark A. Hershberger <mah@everybody.org> 6 ;; Maintainer: Mark A. Hershberger <mah@everybody.org>
7 ;; Keywords: xml, data 7 ;; Keywords: xml, data
8 8
50 ;; 50 ;;
51 ;; There can be only top level node, but with any number of children below. 51 ;; There can be only top level node, but with any number of children below.
52 52
53 ;;; LIST FORMAT 53 ;;; LIST FORMAT
54 54
55 ;; The functions `xml-parse-file' and `xml-parse-tag' return a list with 55 ;; The functions `xml-parse-file', `xml-parse-region' and
56 ;; the following format: 56 ;; `xml-parse-tag' return a list with the following format:
57 ;; 57 ;;
58 ;; xml-list ::= (node node ...) 58 ;; xml-list ::= (node node ...)
59 ;; node ::= (tag_name attribute-list . child_node_list) 59 ;; node ::= (qname attribute-list . child_node_list)
60 ;; child_node_list ::= child_node child_node ... 60 ;; child_node_list ::= child_node child_node ...
61 ;; child_node ::= node | string 61 ;; child_node ::= node | string
62 ;; tag_name ::= string 62 ;; qname ::= (:namespace-uri . "name") | "name"
63 ;; attribute_list ::= (("attribute" . "value") ("attribute" . "value") ...) 63 ;; attribute_list ::= ((qname . "value") (qname . "value") ...)
64 ;; | nil 64 ;; | nil
65 ;; string ::= "..." 65 ;; string ::= "..."
66 ;; 66 ;;
67 ;; Some macros are provided to ease the parsing of this list. 67 ;; Some macros are provided to ease the parsing of this list.
68 ;; Whitespace is preserved. Fixme: There should be a tree-walker that 68 ;; Whitespace is preserved. Fixme: There should be a tree-walker that
69 ;; can remove it. 69 ;; can remove it.
70
71 ;; TODO:
72 ;; * xml:base, xml:space support
73 ;; * more complete DOCTYPE parsing
74 ;; * pi support
70 75
71 ;;; Code: 76 ;;; Code:
72 77
73 ;; Note that {buffer-substring,match-string}-no-properties were 78 ;; Note that {buffer-substring,match-string}-no-properties were
74 ;; formerly used in several places, but that removes composition info. 79 ;; formerly used in several places, but that removes composition info.
102 (if child 107 (if child
103 (if (equal (xml-node-name child) child-name) 108 (if (equal (xml-node-name child) child-name)
104 (push child match)))) 109 (push child match))))
105 (nreverse match))) 110 (nreverse match)))
106 111
107 (defun xml-get-attribute (node attribute) 112 (defun xml-get-attribute-or-nil (node attribute)
108 "Get from NODE the value of ATTRIBUTE. 113 "Get from NODE the value of ATTRIBUTE.
109 An empty string is returned if the attribute was not found." 114 Return `nil' if the attribute was not found.
110 (if (xml-node-attributes node) 115
111 (let ((value (assoc attribute (xml-node-attributes node)))) 116 See also `xml-get-attribute'."
112 (if value 117 (cdr (assoc attribute (xml-node-attributes node))))
113 (cdr value) 118
114 "")) 119 (defsubst xml-get-attribute (node attribute)
115 "")) 120 "Get from NODE the value of ATTRIBUTE.
121 An empty string is returned if the attribute was not found.
122
123 See also `xml-get-attribute-or-nil'."
124 (or (xml-get-attribute-or-nil node attribute) ""))
116 125
117 ;;******************************************************************* 126 ;;*******************************************************************
118 ;;** 127 ;;**
119 ;;** Creating the list 128 ;;** Creating the list
120 ;;** 129 ;;**
206 (goto-char (point-min)) 215 (goto-char (point-min))
207 (while (not (eobp)) 216 (while (not (eobp))
208 (if (search-forward "<" nil t) 217 (if (search-forward "<" nil t)
209 (progn 218 (progn
210 (forward-char -1) 219 (forward-char -1)
211 (if xml 220 (setq result (xml-parse-tag parse-dtd parse-ns))
221 (if (and xml result)
212 ;; translation of rule [1] of XML specifications 222 ;; translation of rule [1] of XML specifications
213 (error "XML files can have only one toplevel tag") 223 (error "XML files can have only one toplevel tag")
214 (setq result (xml-parse-tag parse-dtd parse-ns))
215 (cond 224 (cond
216 ((null result)) 225 ((null result))
217 ((listp (car result)) 226 ((and (listp (car result))
227 parse-dtd)
218 (setq dtd (car result)) 228 (setq dtd (car result))
219 (if (cdr result) ; possible leading comment 229 (if (cdr result) ; possible leading comment
220 (add-to-list 'xml (cdr result)))) 230 (add-to-list 'xml (cdr result))))
221 (t 231 (t
222 (add-to-list 'xml result))))) 232 (add-to-list 'xml result)))))
223 (goto-char (point-max)))) 233 (goto-char (point-max))))
224 (if parse-dtd 234 (if parse-dtd
225 (cons dtd (nreverse xml)) 235 (cons dtd (nreverse xml))
226 (nreverse xml))))))) 236 (nreverse xml)))))))
227 237
238 (defun xml-maybe-do-ns (name default xml-ns)
239 "Perform any namespace expansion. NAME is the name to perform the expansion on.
240 DEFAULT is the default namespace. XML-NS is a cons of namespace
241 names to uris. When namespace-aware parsing is off, then XML-NS
242 is nil.
243
244 During namespace-aware parsing, any name without a namespace is
245 put into the namespace identified by DEFAULT. nil is used to
246 specify that the name shouldn't be given a namespace."
247 (if (consp xml-ns)
248 (let* ((nsp (string-match ":" name))
249 (lname (if nsp (substring name (match-end 0)) name))
250 (prefix (if nsp (substring name 0 (match-beginning 0)) default))
251 (special (and (string-equal lname "xmlns") (not prefix)))
252 ;; Setting default to nil will insure that there is not
253 ;; matching cons in xml-ns. In which case we
254 (ns (or (cdr (assoc (if special "xmlns" prefix)
255 xml-ns))
256 :)))
257 (cons ns (if special "" lname)))
258 (intern name)))
228 259
229 (defun xml-parse-tag (&optional parse-dtd parse-ns) 260 (defun xml-parse-tag (&optional parse-dtd parse-ns)
230 "Parse the tag at point. 261 "Parse the tag at point.
231 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and 262 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
232 returned as the first element in the list. 263 returned as the first element in the list.
237 - a pair : the first element is the DTD, the second is the node." 268 - a pair : the first element is the DTD, the second is the node."
238 (let ((xml-ns (if (consp parse-ns) 269 (let ((xml-ns (if (consp parse-ns)
239 parse-ns 270 parse-ns
240 (if parse-ns 271 (if parse-ns
241 (list 272 (list
242 ;; Default no namespace 273 ;; Default for empty prefix is no namespace
243 (cons "" "") 274 (cons "" :)
275 ;; "xml" namespace
276 (cons "xml" :http://www.w3.org/XML/1998/namespace)
244 ;; We need to seed the xmlns namespace 277 ;; We need to seed the xmlns namespace
245 (cons "xmlns" "http://www.w3.org/2000/xmlns/")))))) 278 (cons "xmlns" :http://www.w3.org/2000/xmlns/))))))
246 (cond 279 (cond
247 ;; Processing instructions (like the <?xml version="1.0"?> tag at the 280 ;; Processing instructions (like the <?xml version="1.0"?> tag at the
248 ;; beginning of a document). 281 ;; beginning of a document).
249 ((looking-at "<\\?") 282 ((looking-at "<\\?")
250 (search-forward "?>") 283 (search-forward "?>")
274 ((looking-at "</") 307 ((looking-at "</")
275 '()) 308 '())
276 ;; opening tag 309 ;; opening tag
277 ((looking-at "<\\([^/>[:space:]]+\\)") 310 ((looking-at "<\\([^/>[:space:]]+\\)")
278 (goto-char (match-end 1)) 311 (goto-char (match-end 1))
312
313 ;; Parse this node
279 (let* ((node-name (match-string 1)) 314 (let* ((node-name (match-string 1))
280 ;; Parse the attribute list. 315 ;; Parse the attribute list.
281 (children (list (xml-parse-attlist) (intern node-name))) 316 (attrs (xml-parse-attlist xml-ns))
282 pos) 317 children pos)
283 318
284 ;; add the xmlns:* attrs to our cache 319 ;; add the xmlns:* attrs to our cache
285 (when (consp xml-ns) 320 (when (consp xml-ns)
286 (mapcar 321 (dolist (attr attrs)
287 (lambda (attr) 322 (when (and (consp (car attr))
288 (let* ((splitup (split-string (symbol-name (car attr)) ":")) 323 (eq :http://www.w3.org/2000/xmlns/
289 (prefix (nth 0 splitup)) 324 (caar attr)))
290 (lname (nth 1 splitup))) 325 (push (cons (cdar attr) (intern (concat ":" (cdr attr))))
291 (when (string= "xmlns" prefix) 326 xml-ns))))
292 (setq xml-ns (append (list (cons (if lname 327
293 lname 328 ;; expand element names
294 "") 329 (setq node-name (list (xml-maybe-do-ns node-name "" xml-ns)))
295 (cdr attr))) 330
296 xml-ns))))) 331 (setq children (list attrs node-name))
297 (car children))
298
299 ;; expand element names
300 (let* ((splitup (split-string (symbol-name (cadr children)) ":"))
301 (lname (or (nth 1 splitup)
302 (nth 0 splitup)))
303 (prefix (if (nth 1 splitup)
304 (nth 0 splitup)
305 "")))
306 (setcdr children (list
307 (intern (concat "{"
308 (cdr (assoc-string prefix xml-ns))
309 "}" lname)))))
310
311 ;; expand attribute names
312 (mapcar
313 (lambda (attr)
314 (let* ((splitup (split-string (symbol-name (car attr)) ":"))
315 (lname (or (nth 1 splitup)
316 (nth 0 splitup)))
317 (prefix (if (nth 1 splitup)
318 (nth 0 splitup)
319 (caar xml-ns))))
320
321 (setcar attr (intern (concat "{"
322 (cdr (assoc-string prefix xml-ns))
323 "}" lname)))))
324 (car children)))
325
326 ;; is this an empty element ? 332 ;; is this an empty element ?
327 (if (looking-at "/>") 333 (if (looking-at "/>")
328 (progn 334 (progn
329 (forward-char 2) 335 (forward-char 2)
330 (nreverse children)) 336 (nreverse children))
374 ;; This was an invalid start tag 380 ;; This was an invalid start tag
375 (error "XML: Invalid attribute list"))))) 381 (error "XML: Invalid attribute list")))))
376 (t ;; This is not a tag. 382 (t ;; This is not a tag.
377 (error "XML: Invalid character"))))) 383 (error "XML: Invalid character")))))
378 384
379 (defun xml-parse-attlist () 385 (defun xml-parse-attlist (&optional xml-ns)
380 "Return the attribute-list after point.Leave point at the first non-blank character after the tag." 386 "Return the attribute-list after point. Leave point at the
387 first non-blank character after the tag."
381 (let ((attlist ()) 388 (let ((attlist ())
382 start-pos name) 389 end-pos name)
383 (skip-syntax-forward " ") 390 (skip-syntax-forward " ")
384 (while (looking-at (eval-when-compile 391 (while (looking-at (eval-when-compile
385 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) 392 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
386 (setq name (intern (match-string 1))) 393 (setq end-pos (match-end 0))
387 (goto-char (match-end 0)) 394 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns))
395 (goto-char end-pos)
388 396
389 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize 397 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
390 398
391 ;; Do we have a string between quotes (or double-quotes), 399 ;; Do we have a string between quotes (or double-quotes),
392 ;; or a simple word ? 400 ;; or a simple word ?
393 (if (looking-at "\"\\([^\"]*\\)\"") 401 (if (looking-at "\"\\([^\"]*\\)\"")
394 (setq start-pos (match-beginning 0)) 402 (setq end-pos (match-end 0))
395 (if (looking-at "'\\([^']*\\)'") 403 (if (looking-at "'\\([^']*\\)'")
396 (setq start-pos (match-beginning 0)) 404 (setq end-pos (match-end 0))
397 (error "XML: Attribute values must be given between quotes"))) 405 (error "XML: Attribute values must be given between quotes")))
398 406
399 ;; Each attribute must be unique within a given element 407 ;; Each attribute must be unique within a given element
400 (if (assoc name attlist) 408 (if (assoc name attlist)
401 (error "XML: each attribute must be unique within an element")) 409 (error "XML: each attribute must be unique within an element"))
405 (let ((string (match-string 1)) 413 (let ((string (match-string 1))
406 (pos 0)) 414 (pos 0))
407 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) 415 (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
408 (push (cons name (xml-substitute-special string)) attlist)) 416 (push (cons name (xml-substitute-special string)) attlist))
409 417
410 (goto-char start-pos) 418 (goto-char end-pos)
411 (forward-sexp) ; we have string syntax
412
413 (skip-syntax-forward " ")) 419 (skip-syntax-forward " "))
414 (nreverse attlist))) 420 (nreverse attlist)))
415 421
416 ;;******************************************************************* 422 ;;*******************************************************************
417 ;;** 423 ;;**
488 494
489 ;; Translation of rule [45] of XML specifications 495 ;; Translation of rule [45] of XML specifications
490 ((looking-at 496 ((looking-at
491 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") 497 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
492 498
493 (setq element (intern (match-string 1)) 499 (setq element (match-string 1)
494 type (match-string-no-properties 2)) 500 type (match-string-no-properties 2))
495 (setq end-pos (match-end 0)) 501 (setq end-pos (match-end 0))
496 502
497 ;; Translation of rule [46] of XML specifications 503 ;; Translation of rule [46] of XML specifications
498 (cond 504 (cond
508 (error "XML: Invalid element type in the DTD"))) 514 (error "XML: Invalid element type in the DTD")))
509 515
510 ;; rule [45]: the element declaration must be unique 516 ;; rule [45]: the element declaration must be unique
511 (if (assoc element dtd) 517 (if (assoc element dtd)
512 (error "XML: element declarations must be unique in a DTD (<%s>)" 518 (error "XML: element declarations must be unique in a DTD (<%s>)"
513 (symbol-name element))) 519 element))
514 520
515 ;; Store the element in the DTD 521 ;; Store the element in the DTD
516 (push (list element type) dtd) 522 (push (list element type) dtd)
517 (goto-char end-pos)) 523 (goto-char end-pos))
518 ((looking-at "<!--") 524 ((looking-at "<!--")
522 (error "XML: Invalid DTD item"))) 528 (error "XML: Invalid DTD item")))
523 529
524 ;; Skip the end of the DTD 530 ;; Skip the end of the DTD
525 (search-forward ">")))) 531 (search-forward ">"))))
526 (nreverse dtd))) 532 (nreverse dtd)))
527
528 533
529 (defun xml-parse-elem-type (string) 534 (defun xml-parse-elem-type (string)
530 "Convert element type STRING into a Lisp structure." 535 "Convert element type STRING into a Lisp structure."
531 536
532 (let (elem modifier) 537 (let (elem modifier)
641 (insert ?\n indent-string 646 (insert ?\n indent-string
642 ?< ?/ (symbol-name (xml-node-name xml)) ?>))) 647 ?< ?/ (symbol-name (xml-node-name xml)) ?>)))
643 648
644 (provide 'xml) 649 (provide 'xml)
645 650
651 ;;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b
646 ;;; xml.el ends here 652 ;;; xml.el ends here