Mercurial > emacs
comparison lisp/xml.el @ 89909:68c22ea6027c
Sync to HEAD
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 16 Apr 2004 12:51:06 +0000 |
parents | 375f2633d815 |
children | 4c90ffeb71c5 |
comparison
equal
deleted
inserted
replaced
89908:ee1402f7b568 | 89909:68c22ea6027c |
---|---|
1 ;;; xml.el --- XML parser | 1 ;;; xml.el --- XML parser |
2 | 2 |
3 ;; Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. | 3 ;; Copyright (C) 2000, 01, 03, 2004 Free Software Foundation, Inc. |
4 | 4 |
5 ;; Author: Emmanuel Briot <briot@gnat.com> | 5 ;; Author: Emmanuel Briot <briot@gnat.com> |
6 ;; Maintainer: Mark A. Hershberger <mah@everybody.org> | 6 ;; Maintainer: Mark A. Hershberger <mah@everybody.org> |
7 ;; Keywords: xml, data | 7 ;; Keywords: xml, data |
8 | 8 |
50 ;; | 50 ;; |
51 ;; There can be only top level node, but with any number of children below. | 51 ;; There can be only top level node, but with any number of children below. |
52 | 52 |
53 ;;; LIST FORMAT | 53 ;;; LIST FORMAT |
54 | 54 |
55 ;; The functions `xml-parse-file' and `xml-parse-tag' return a list with | 55 ;; The functions `xml-parse-file', `xml-parse-region' and |
56 ;; the following format: | 56 ;; `xml-parse-tag' return a list with the following format: |
57 ;; | 57 ;; |
58 ;; xml-list ::= (node node ...) | 58 ;; xml-list ::= (node node ...) |
59 ;; node ::= (tag_name attribute-list . child_node_list) | 59 ;; node ::= (qname attribute-list . child_node_list) |
60 ;; child_node_list ::= child_node child_node ... | 60 ;; child_node_list ::= child_node child_node ... |
61 ;; child_node ::= node | string | 61 ;; child_node ::= node | string |
62 ;; tag_name ::= string | 62 ;; qname ::= (:namespace-uri . "name") | "name" |
63 ;; attribute_list ::= (("attribute" . "value") ("attribute" . "value") ...) | 63 ;; attribute_list ::= ((qname . "value") (qname . "value") ...) |
64 ;; | nil | 64 ;; | nil |
65 ;; string ::= "..." | 65 ;; string ::= "..." |
66 ;; | 66 ;; |
67 ;; Some macros are provided to ease the parsing of this list. | 67 ;; Some macros are provided to ease the parsing of this list. |
68 ;; Whitespace is preserved. Fixme: There should be a tree-walker that | 68 ;; Whitespace is preserved. Fixme: There should be a tree-walker that |
69 ;; can remove it. | 69 ;; can remove it. |
70 | |
71 ;; TODO: | |
72 ;; * xml:base, xml:space support | |
73 ;; * more complete DOCTYPE parsing | |
74 ;; * pi support | |
70 | 75 |
71 ;;; Code: | 76 ;;; Code: |
72 | 77 |
73 ;; Note that {buffer-substring,match-string}-no-properties were | 78 ;; Note that {buffer-substring,match-string}-no-properties were |
74 ;; formerly used in several places, but that removes composition info. | 79 ;; formerly used in several places, but that removes composition info. |
102 (if child | 107 (if child |
103 (if (equal (xml-node-name child) child-name) | 108 (if (equal (xml-node-name child) child-name) |
104 (push child match)))) | 109 (push child match)))) |
105 (nreverse match))) | 110 (nreverse match))) |
106 | 111 |
107 (defun xml-get-attribute (node attribute) | 112 (defun xml-get-attribute-or-nil (node attribute) |
108 "Get from NODE the value of ATTRIBUTE. | 113 "Get from NODE the value of ATTRIBUTE. |
109 An empty string is returned if the attribute was not found." | 114 Return `nil' if the attribute was not found. |
110 (if (xml-node-attributes node) | 115 |
111 (let ((value (assoc attribute (xml-node-attributes node)))) | 116 See also `xml-get-attribute'." |
112 (if value | 117 (cdr (assoc attribute (xml-node-attributes node)))) |
113 (cdr value) | 118 |
114 "")) | 119 (defsubst xml-get-attribute (node attribute) |
115 "")) | 120 "Get from NODE the value of ATTRIBUTE. |
121 An empty string is returned if the attribute was not found. | |
122 | |
123 See also `xml-get-attribute-or-nil'." | |
124 (or (xml-get-attribute-or-nil node attribute) "")) | |
116 | 125 |
117 ;;******************************************************************* | 126 ;;******************************************************************* |
118 ;;** | 127 ;;** |
119 ;;** Creating the list | 128 ;;** Creating the list |
120 ;;** | 129 ;;** |
206 (goto-char (point-min)) | 215 (goto-char (point-min)) |
207 (while (not (eobp)) | 216 (while (not (eobp)) |
208 (if (search-forward "<" nil t) | 217 (if (search-forward "<" nil t) |
209 (progn | 218 (progn |
210 (forward-char -1) | 219 (forward-char -1) |
211 (if xml | 220 (setq result (xml-parse-tag parse-dtd parse-ns)) |
221 (if (and xml result) | |
212 ;; translation of rule [1] of XML specifications | 222 ;; translation of rule [1] of XML specifications |
213 (error "XML files can have only one toplevel tag") | 223 (error "XML files can have only one toplevel tag") |
214 (setq result (xml-parse-tag parse-dtd parse-ns)) | |
215 (cond | 224 (cond |
216 ((null result)) | 225 ((null result)) |
217 ((listp (car result)) | 226 ((and (listp (car result)) |
227 parse-dtd) | |
218 (setq dtd (car result)) | 228 (setq dtd (car result)) |
219 (if (cdr result) ; possible leading comment | 229 (if (cdr result) ; possible leading comment |
220 (add-to-list 'xml (cdr result)))) | 230 (add-to-list 'xml (cdr result)))) |
221 (t | 231 (t |
222 (add-to-list 'xml result))))) | 232 (add-to-list 'xml result))))) |
223 (goto-char (point-max)))) | 233 (goto-char (point-max)))) |
224 (if parse-dtd | 234 (if parse-dtd |
225 (cons dtd (nreverse xml)) | 235 (cons dtd (nreverse xml)) |
226 (nreverse xml))))))) | 236 (nreverse xml))))))) |
227 | 237 |
238 (defun xml-maybe-do-ns (name default xml-ns) | |
239 "Perform any namespace expansion. NAME is the name to perform the expansion on. | |
240 DEFAULT is the default namespace. XML-NS is a cons of namespace | |
241 names to uris. When namespace-aware parsing is off, then XML-NS | |
242 is nil. | |
243 | |
244 During namespace-aware parsing, any name without a namespace is | |
245 put into the namespace identified by DEFAULT. nil is used to | |
246 specify that the name shouldn't be given a namespace." | |
247 (if (consp xml-ns) | |
248 (let* ((nsp (string-match ":" name)) | |
249 (lname (if nsp (substring name (match-end 0)) name)) | |
250 (prefix (if nsp (substring name 0 (match-beginning 0)) default)) | |
251 (special (and (string-equal lname "xmlns") (not prefix))) | |
252 ;; Setting default to nil will insure that there is not | |
253 ;; matching cons in xml-ns. In which case we | |
254 (ns (or (cdr (assoc (if special "xmlns" prefix) | |
255 xml-ns)) | |
256 :))) | |
257 (cons ns (if special "" lname))) | |
258 (intern name))) | |
228 | 259 |
229 (defun xml-parse-tag (&optional parse-dtd parse-ns) | 260 (defun xml-parse-tag (&optional parse-dtd parse-ns) |
230 "Parse the tag at point. | 261 "Parse the tag at point. |
231 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and | 262 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and |
232 returned as the first element in the list. | 263 returned as the first element in the list. |
237 - a pair : the first element is the DTD, the second is the node." | 268 - a pair : the first element is the DTD, the second is the node." |
238 (let ((xml-ns (if (consp parse-ns) | 269 (let ((xml-ns (if (consp parse-ns) |
239 parse-ns | 270 parse-ns |
240 (if parse-ns | 271 (if parse-ns |
241 (list | 272 (list |
242 ;; Default no namespace | 273 ;; Default for empty prefix is no namespace |
243 (cons "" "") | 274 (cons "" :) |
275 ;; "xml" namespace | |
276 (cons "xml" :http://www.w3.org/XML/1998/namespace) | |
244 ;; We need to seed the xmlns namespace | 277 ;; We need to seed the xmlns namespace |
245 (cons "xmlns" "http://www.w3.org/2000/xmlns/")))))) | 278 (cons "xmlns" :http://www.w3.org/2000/xmlns/)))))) |
246 (cond | 279 (cond |
247 ;; Processing instructions (like the <?xml version="1.0"?> tag at the | 280 ;; Processing instructions (like the <?xml version="1.0"?> tag at the |
248 ;; beginning of a document). | 281 ;; beginning of a document). |
249 ((looking-at "<\\?") | 282 ((looking-at "<\\?") |
250 (search-forward "?>") | 283 (search-forward "?>") |
274 ((looking-at "</") | 307 ((looking-at "</") |
275 '()) | 308 '()) |
276 ;; opening tag | 309 ;; opening tag |
277 ((looking-at "<\\([^/>[:space:]]+\\)") | 310 ((looking-at "<\\([^/>[:space:]]+\\)") |
278 (goto-char (match-end 1)) | 311 (goto-char (match-end 1)) |
312 | |
313 ;; Parse this node | |
279 (let* ((node-name (match-string 1)) | 314 (let* ((node-name (match-string 1)) |
280 ;; Parse the attribute list. | 315 ;; Parse the attribute list. |
281 (children (list (xml-parse-attlist) (intern node-name))) | 316 (attrs (xml-parse-attlist xml-ns)) |
282 pos) | 317 children pos) |
283 | 318 |
284 ;; add the xmlns:* attrs to our cache | 319 ;; add the xmlns:* attrs to our cache |
285 (when (consp xml-ns) | 320 (when (consp xml-ns) |
286 (mapcar | 321 (dolist (attr attrs) |
287 (lambda (attr) | 322 (when (and (consp (car attr)) |
288 (let* ((splitup (split-string (symbol-name (car attr)) ":")) | 323 (eq :http://www.w3.org/2000/xmlns/ |
289 (prefix (nth 0 splitup)) | 324 (caar attr))) |
290 (lname (nth 1 splitup))) | 325 (push (cons (cdar attr) (intern (concat ":" (cdr attr)))) |
291 (when (string= "xmlns" prefix) | 326 xml-ns)))) |
292 (setq xml-ns (append (list (cons (if lname | 327 |
293 lname | 328 ;; expand element names |
294 "") | 329 (setq node-name (list (xml-maybe-do-ns node-name "" xml-ns))) |
295 (cdr attr))) | 330 |
296 xml-ns))))) | 331 (setq children (list attrs node-name)) |
297 (car children)) | |
298 | |
299 ;; expand element names | |
300 (let* ((splitup (split-string (symbol-name (cadr children)) ":")) | |
301 (lname (or (nth 1 splitup) | |
302 (nth 0 splitup))) | |
303 (prefix (if (nth 1 splitup) | |
304 (nth 0 splitup) | |
305 ""))) | |
306 (setcdr children (list | |
307 (intern (concat "{" | |
308 (cdr (assoc-string prefix xml-ns)) | |
309 "}" lname))))) | |
310 | |
311 ;; expand attribute names | |
312 (mapcar | |
313 (lambda (attr) | |
314 (let* ((splitup (split-string (symbol-name (car attr)) ":")) | |
315 (lname (or (nth 1 splitup) | |
316 (nth 0 splitup))) | |
317 (prefix (if (nth 1 splitup) | |
318 (nth 0 splitup) | |
319 (caar xml-ns)))) | |
320 | |
321 (setcar attr (intern (concat "{" | |
322 (cdr (assoc-string prefix xml-ns)) | |
323 "}" lname))))) | |
324 (car children))) | |
325 | |
326 ;; is this an empty element ? | 332 ;; is this an empty element ? |
327 (if (looking-at "/>") | 333 (if (looking-at "/>") |
328 (progn | 334 (progn |
329 (forward-char 2) | 335 (forward-char 2) |
330 (nreverse children)) | 336 (nreverse children)) |
374 ;; This was an invalid start tag | 380 ;; This was an invalid start tag |
375 (error "XML: Invalid attribute list"))))) | 381 (error "XML: Invalid attribute list"))))) |
376 (t ;; This is not a tag. | 382 (t ;; This is not a tag. |
377 (error "XML: Invalid character"))))) | 383 (error "XML: Invalid character"))))) |
378 | 384 |
379 (defun xml-parse-attlist () | 385 (defun xml-parse-attlist (&optional xml-ns) |
380 "Return the attribute-list after point.Leave point at the first non-blank character after the tag." | 386 "Return the attribute-list after point. Leave point at the |
387 first non-blank character after the tag." | |
381 (let ((attlist ()) | 388 (let ((attlist ()) |
382 start-pos name) | 389 end-pos name) |
383 (skip-syntax-forward " ") | 390 (skip-syntax-forward " ") |
384 (while (looking-at (eval-when-compile | 391 (while (looking-at (eval-when-compile |
385 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) | 392 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) |
386 (setq name (intern (match-string 1))) | 393 (setq end-pos (match-end 0)) |
387 (goto-char (match-end 0)) | 394 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns)) |
395 (goto-char end-pos) | |
388 | 396 |
389 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize | 397 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize |
390 | 398 |
391 ;; Do we have a string between quotes (or double-quotes), | 399 ;; Do we have a string between quotes (or double-quotes), |
392 ;; or a simple word ? | 400 ;; or a simple word ? |
393 (if (looking-at "\"\\([^\"]*\\)\"") | 401 (if (looking-at "\"\\([^\"]*\\)\"") |
394 (setq start-pos (match-beginning 0)) | 402 (setq end-pos (match-end 0)) |
395 (if (looking-at "'\\([^']*\\)'") | 403 (if (looking-at "'\\([^']*\\)'") |
396 (setq start-pos (match-beginning 0)) | 404 (setq end-pos (match-end 0)) |
397 (error "XML: Attribute values must be given between quotes"))) | 405 (error "XML: Attribute values must be given between quotes"))) |
398 | 406 |
399 ;; Each attribute must be unique within a given element | 407 ;; Each attribute must be unique within a given element |
400 (if (assoc name attlist) | 408 (if (assoc name attlist) |
401 (error "XML: each attribute must be unique within an element")) | 409 (error "XML: each attribute must be unique within an element")) |
405 (let ((string (match-string 1)) | 413 (let ((string (match-string 1)) |
406 (pos 0)) | 414 (pos 0)) |
407 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) | 415 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) |
408 (push (cons name (xml-substitute-special string)) attlist)) | 416 (push (cons name (xml-substitute-special string)) attlist)) |
409 | 417 |
410 (goto-char start-pos) | 418 (goto-char end-pos) |
411 (forward-sexp) ; we have string syntax | |
412 | |
413 (skip-syntax-forward " ")) | 419 (skip-syntax-forward " ")) |
414 (nreverse attlist))) | 420 (nreverse attlist))) |
415 | 421 |
416 ;;******************************************************************* | 422 ;;******************************************************************* |
417 ;;** | 423 ;;** |
488 | 494 |
489 ;; Translation of rule [45] of XML specifications | 495 ;; Translation of rule [45] of XML specifications |
490 ((looking-at | 496 ((looking-at |
491 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") | 497 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") |
492 | 498 |
493 (setq element (intern (match-string 1)) | 499 (setq element (match-string 1) |
494 type (match-string-no-properties 2)) | 500 type (match-string-no-properties 2)) |
495 (setq end-pos (match-end 0)) | 501 (setq end-pos (match-end 0)) |
496 | 502 |
497 ;; Translation of rule [46] of XML specifications | 503 ;; Translation of rule [46] of XML specifications |
498 (cond | 504 (cond |
508 (error "XML: Invalid element type in the DTD"))) | 514 (error "XML: Invalid element type in the DTD"))) |
509 | 515 |
510 ;; rule [45]: the element declaration must be unique | 516 ;; rule [45]: the element declaration must be unique |
511 (if (assoc element dtd) | 517 (if (assoc element dtd) |
512 (error "XML: element declarations must be unique in a DTD (<%s>)" | 518 (error "XML: element declarations must be unique in a DTD (<%s>)" |
513 (symbol-name element))) | 519 element)) |
514 | 520 |
515 ;; Store the element in the DTD | 521 ;; Store the element in the DTD |
516 (push (list element type) dtd) | 522 (push (list element type) dtd) |
517 (goto-char end-pos)) | 523 (goto-char end-pos)) |
518 ((looking-at "<!--") | 524 ((looking-at "<!--") |
522 (error "XML: Invalid DTD item"))) | 528 (error "XML: Invalid DTD item"))) |
523 | 529 |
524 ;; Skip the end of the DTD | 530 ;; Skip the end of the DTD |
525 (search-forward ">")))) | 531 (search-forward ">")))) |
526 (nreverse dtd))) | 532 (nreverse dtd))) |
527 | |
528 | 533 |
529 (defun xml-parse-elem-type (string) | 534 (defun xml-parse-elem-type (string) |
530 "Convert element type STRING into a Lisp structure." | 535 "Convert element type STRING into a Lisp structure." |
531 | 536 |
532 (let (elem modifier) | 537 (let (elem modifier) |
641 (insert ?\n indent-string | 646 (insert ?\n indent-string |
642 ?< ?/ (symbol-name (xml-node-name xml)) ?>))) | 647 ?< ?/ (symbol-name (xml-node-name xml)) ?>))) |
643 | 648 |
644 (provide 'xml) | 649 (provide 'xml) |
645 | 650 |
651 ;;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b | |
646 ;;; xml.el ends here | 652 ;;; xml.el ends here |