Mercurial > emacs
comparison lisp/xml.el @ 54877:1bf7ef48f54f
(xml-maybe-do-ns): New function to handle namespace
parsing of both attribute and element names.
(xml-ns-parse-ns-attrs, xml-ns-expand-el, xml-ns-expand-attr)
(xml-intern-attrlist): Remove in favor of xml-maybe-do-ns.
(xml-parse-tag): Update assumed namespaces. Clean up namespace parsing.
(xml-parse-attlist): Make it do its own namespace parsing.
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Wed, 14 Apr 2004 18:36:14 +0000 |
parents | 586ffda6e9f9 |
children | 9204ad91984c |
comparison
equal
deleted
inserted
replaced
54876:95ee18354a3a | 54877:1bf7ef48f54f |
---|---|
50 ;; | 50 ;; |
51 ;; There can be only top level node, but with any number of children below. | 51 ;; There can be only top level node, but with any number of children below. |
52 | 52 |
53 ;;; LIST FORMAT | 53 ;;; LIST FORMAT |
54 | 54 |
55 ;; The functions `xml-parse-file' and `xml-parse-tag' return a list with | 55 ;; The functions `xml-parse-file', `xml-parse-region' and |
56 ;; the following format: | 56 ;; `xml-parse-tag' return a list with the following format: |
57 ;; | 57 ;; |
58 ;; xml-list ::= (node node ...) | 58 ;; xml-list ::= (node node ...) |
59 ;; node ::= (tag_name attribute-list . child_node_list) | 59 ;; node ::= (qname attribute-list . child_node_list) |
60 ;; child_node_list ::= child_node child_node ... | 60 ;; child_node_list ::= child_node child_node ... |
61 ;; child_node ::= node | string | 61 ;; child_node ::= node | string |
62 ;; tag_name ::= string | 62 ;; qname ::= (:namespace-uri . "name") | "name" |
63 ;; attribute_list ::= (("attribute" . "value") ("attribute" . "value") ...) | 63 ;; attribute_list ::= ((qname . "value") (qname . "value") ...) |
64 ;; | nil | 64 ;; | nil |
65 ;; string ::= "..." | 65 ;; string ::= "..." |
66 ;; | 66 ;; |
67 ;; Some macros are provided to ease the parsing of this list. | 67 ;; Some macros are provided to ease the parsing of this list. |
68 ;; Whitespace is preserved. Fixme: There should be a tree-walker that | 68 ;; Whitespace is preserved. Fixme: There should be a tree-walker that |
69 ;; can remove it. | 69 ;; can remove it. |
70 | |
71 ;; TODO: | |
72 ;; * xml:base, xml:space support | |
73 ;; * more complete DOCTYPE parsing | |
74 ;; * pi support | |
70 | 75 |
71 ;;; Code: | 76 ;;; Code: |
72 | 77 |
73 ;; Note that {buffer-substring,match-string}-no-properties were | 78 ;; Note that {buffer-substring,match-string}-no-properties were |
74 ;; formerly used in several places, but that removes composition info. | 79 ;; formerly used in several places, but that removes composition info. |
228 (goto-char (point-max)))) | 233 (goto-char (point-max)))) |
229 (if parse-dtd | 234 (if parse-dtd |
230 (cons dtd (nreverse xml)) | 235 (cons dtd (nreverse xml)) |
231 (nreverse xml))))))) | 236 (nreverse xml))))))) |
232 | 237 |
233 (defun xml-ns-parse-ns-attrs (attr-list &optional xml-ns) | 238 (defun xml-maybe-do-ns (name default xml-ns) |
234 "Parse the namespace attributes and return a list of cons in the form: | 239 "Perform any namespace expansion. NAME is the name to perform the expansion on. |
235 \(namespace . prefix)" | 240 DEFAULT is the default namespace. XML-NS is a cons of namespace |
236 | 241 names to uris. When namespace-aware parsing is off, then XML-NS |
237 (mapcar | 242 is nil. |
238 (lambda (attr) | 243 |
239 (let* ((splitup (split-string (car attr) ":")) | 244 During namespace-aware parsing, any name without a namespace is |
240 (prefix (nth 0 splitup)) | 245 put into the namespace identified by DEFAULT. nil is used to |
241 (lname (nth 1 splitup))) | 246 specify that the name shouldn't be given a namespace." |
242 (when (string= "xmlns" prefix) | 247 (if (consp xml-ns) |
243 (push (cons (if lname | 248 (let* ((nsp (string-match ":" name)) |
244 lname | 249 (lname (if nsp (substring name (match-end 0)) name)) |
245 "") | 250 (prefix (if nsp (substring name 0 (match-beginning 0)) default)) |
246 (cdr attr)) | 251 (special (and (string-equal lname "xmlns") (not prefix))) |
247 xml-ns)))) attr-list) | 252 ;; Setting default to nil will insure that there is not |
248 xml-ns) | 253 ;; matching cons in xml-ns. In which case we |
249 | 254 (ns (or (cdr (assoc (if special "xmlns" prefix) |
250 ;; expand element names | 255 xml-ns)) |
251 (defun xml-ns-expand-el (el xml-ns) | 256 :))) |
252 "Expand the XML elements from \"prefix:local-name\" to a cons in the form | 257 (cons ns (if special "" lname))) |
253 \"(namespace . local-name)\"." | 258 (intern name))) |
254 | |
255 (let* ((splitup (split-string el ":")) | |
256 (lname (or (nth 1 splitup) | |
257 (nth 0 splitup))) | |
258 (prefix (if (nth 1 splitup) | |
259 (nth 0 splitup) | |
260 (if (string= lname "xmlns") | |
261 "xmlns" | |
262 ""))) | |
263 (ns (cdr (assoc-string prefix xml-ns)))) | |
264 (if (string= "" ns) | |
265 lname | |
266 (cons (intern (concat ":" ns)) | |
267 lname)))) | |
268 | |
269 ;; expand attribute names | |
270 (defun xml-ns-expand-attr (attr-list xml-ns) | |
271 "Expand the attribute list for a particular element from the form | |
272 \"prefix:local-name\" to the form \"{namespace}:local-name\"." | |
273 | |
274 (mapcar | |
275 (lambda (attr) | |
276 (let* ((splitup (split-string (car attr) ":")) | |
277 (lname (or (nth 1 splitup) | |
278 (nth 0 splitup))) | |
279 (prefix (if (nth 1 splitup) | |
280 (nth 0 splitup) | |
281 (if (string= (car attr) "xmlns") | |
282 "xmlns" | |
283 ""))) | |
284 (ns (cdr (assoc-string prefix xml-ns)))) | |
285 (setcar attr | |
286 (if (string= "" ns) | |
287 lname | |
288 (cons (intern (concat ":" ns)) | |
289 lname))))) | |
290 attr-list) | |
291 attr-list) | |
292 | |
293 (defun xml-intern-attrlist (attr-list) | |
294 "Convert attribute names to symbols for backward compatibility." | |
295 (mapcar (lambda (attr) | |
296 (setcar attr (intern (car attr)))) | |
297 attr-list) | |
298 attr-list) | |
299 | 259 |
300 (defun xml-parse-tag (&optional parse-dtd parse-ns) | 260 (defun xml-parse-tag (&optional parse-dtd parse-ns) |
301 "Parse the tag at point. | 261 "Parse the tag at point. |
302 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and | 262 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and |
303 returned as the first element in the list. | 263 returned as the first element in the list. |
308 - a pair : the first element is the DTD, the second is the node." | 268 - a pair : the first element is the DTD, the second is the node." |
309 (let ((xml-ns (if (consp parse-ns) | 269 (let ((xml-ns (if (consp parse-ns) |
310 parse-ns | 270 parse-ns |
311 (if parse-ns | 271 (if parse-ns |
312 (list | 272 (list |
313 ;; Default no namespace | 273 ;; Default for empty prefix is no namespace |
314 (cons "" "") | 274 (cons "" :) |
275 ;; "xml" namespace | |
276 (cons "xml" :http://www.w3.org/XML/1998/namespace) | |
315 ;; We need to seed the xmlns namespace | 277 ;; We need to seed the xmlns namespace |
316 (cons "xmlns" "http://www.w3.org/2000/xmlns/")))))) | 278 (cons "xmlns" :http://www.w3.org/2000/xmlns/)))))) |
317 (cond | 279 (cond |
318 ;; Processing instructions (like the <?xml version="1.0"?> tag at the | 280 ;; Processing instructions (like the <?xml version="1.0"?> tag at the |
319 ;; beginning of a document). | 281 ;; beginning of a document). |
320 ((looking-at "<\\?") | 282 ((looking-at "<\\?") |
321 (search-forward "?>") | 283 (search-forward "?>") |
348 ((looking-at "<\\([^/>[:space:]]+\\)") | 310 ((looking-at "<\\([^/>[:space:]]+\\)") |
349 (goto-char (match-end 1)) | 311 (goto-char (match-end 1)) |
350 | 312 |
351 ;; Parse this node | 313 ;; Parse this node |
352 (let* ((node-name (match-string 1)) | 314 (let* ((node-name (match-string 1)) |
353 (attr-list (xml-parse-attlist)) | 315 ;; Parse the attribute list. |
354 (children (if (consp xml-ns) ;; take care of namespace parsing | 316 (attrs (xml-parse-attlist xml-ns)) |
355 (progn | 317 children pos) |
356 (setq xml-ns (xml-ns-parse-ns-attrs | 318 |
357 attr-list xml-ns)) | 319 ;; add the xmlns:* attrs to our cache |
358 (list (xml-ns-expand-attr | 320 (when (consp xml-ns) |
359 attr-list xml-ns) | 321 (dolist (attr attrs) |
360 (xml-ns-expand-el | 322 (when (and (consp (car attr)) |
361 node-name xml-ns))) | 323 (eq :http://www.w3.org/2000/xmlns/ |
362 (list (xml-intern-attrlist attr-list) | 324 (caar attr))) |
363 (intern node-name)))) | 325 (push (cons (cdar attr) (intern (concat ":" (cdr attr)))) |
364 pos) | 326 xml-ns)))) |
365 | 327 |
328 ;; expand element names | |
329 (setq node-name (list (xml-maybe-do-ns node-name "" xml-ns))) | |
330 | |
331 (setq children (list attrs node-name)) | |
366 ;; is this an empty element ? | 332 ;; is this an empty element ? |
367 (if (looking-at "/>") | 333 (if (looking-at "/>") |
368 (progn | 334 (progn |
369 (forward-char 2) | 335 (forward-char 2) |
370 (nreverse children)) | 336 (nreverse children)) |
414 ;; This was an invalid start tag | 380 ;; This was an invalid start tag |
415 (error "XML: Invalid attribute list"))))) | 381 (error "XML: Invalid attribute list"))))) |
416 (t ;; This is not a tag. | 382 (t ;; This is not a tag. |
417 (error "XML: Invalid character"))))) | 383 (error "XML: Invalid character"))))) |
418 | 384 |
419 (defun xml-parse-attlist () | 385 (defun xml-parse-attlist (&optional xml-ns) |
420 "Return the attribute-list after point. Leave point at the | 386 "Return the attribute-list after point. Leave point at the |
421 first non-blank character after the tag." | 387 first non-blank character after the tag." |
422 (let ((attlist ()) | 388 (let ((attlist ()) |
423 end-pos name) | 389 end-pos name) |
424 (skip-syntax-forward " ") | 390 (skip-syntax-forward " ") |
425 (while (looking-at (eval-when-compile | 391 (while (looking-at (eval-when-compile |
426 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) | 392 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) |
427 (setq name (match-string 1)) | 393 (setq end-pos (match-end 0)) |
428 (goto-char (match-end 0)) | 394 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns)) |
395 (goto-char end-pos) | |
429 | 396 |
430 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize | 397 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize |
431 | 398 |
432 ;; Do we have a string between quotes (or double-quotes), | 399 ;; Do we have a string between quotes (or double-quotes), |
433 ;; or a simple word ? | 400 ;; or a simple word ? |