comparison lisp/xml.el @ 42031:54db4085a7df

Use setq rather than (set 'foo bar). Use push+nreverse rather than append. (xml-node-name, xml-node-attributes, xml-node-children): Use defsubst rather than macros. (xml-parse-region): Handle a nil return value from xml-parse-tag. (xml-parse-tag): Don't skip white space. Return nil for a comment. Concat the two strings surrounding a comment into a single string.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Fri, 14 Dec 2001 22:12:30 +0000
parents 7507bd185307
children 373960858ccc
comparison
equal deleted inserted replaced
42030:74a3864ffe9a 42031:54db4085a7df
71 ;;** 71 ;;**
72 ;;** Macros to parse the list 72 ;;** Macros to parse the list
73 ;;** 73 ;;**
74 ;;******************************************************************* 74 ;;*******************************************************************
75 75
76 (defmacro xml-node-name (node) 76 (defsubst xml-node-name (node)
77 "Return the tag associated with NODE. 77 "Return the tag associated with NODE.
78 The tag is a lower-case symbol." 78 The tag is a lower-case symbol."
79 (list 'car node)) 79 (car node))
80 80
81 (defmacro xml-node-attributes (node) 81 (defsubst xml-node-attributes (node)
82 "Return the list of attributes of NODE. 82 "Return the list of attributes of NODE.
83 The list can be nil." 83 The list can be nil."
84 (list 'nth 1 node)) 84 (nth 1 node))
85 85
86 (defmacro xml-node-children (node) 86 (defsubst xml-node-children (node)
87 "Return the list of children of NODE. 87 "Return the list of children of NODE.
88 This is a list of nodes, and it can be nil." 88 This is a list of nodes, and it can be nil."
89 (list 'cddr node)) 89 (cddr node))
90 90
91 (defun xml-get-children (node child-name) 91 (defun xml-get-children (node child-name)
92 "Return the children of NODE whose tag is CHILD-NAME. 92 "Return the children of NODE whose tag is CHILD-NAME.
93 CHILD-NAME should be a lower case symbol." 93 CHILD-NAME should be a lower case symbol."
94 (let ((children (xml-node-children node)) 94 (let ((match ()))
95 match) 95 (dolist (child (xml-node-children node))
96 (while children 96 (if child
97 (if (car children) 97 (if (equal (xml-node-name child) child-name)
98 (if (equal (xml-node-name (car children)) child-name) 98 (push child match))))
99 (set 'match (append match (list (car children)))))) 99 (nreverse match)))
100 (set 'children (cdr children)))
101 match))
102 100
103 (defun xml-get-attribute (node attribute) 101 (defun xml-get-attribute (node attribute)
104 "Get from NODE the value of ATTRIBUTE. 102 "Get from NODE the value of ATTRIBUTE.
105 An empty string is returned if the attribute was not found." 103 An empty string is returned if the attribute was not found."
106 (if (xml-node-attributes node) 104 (if (xml-node-attributes node)
153 (if (search-forward "<" end t) 151 (if (search-forward "<" end t)
154 (progn 152 (progn
155 (forward-char -1) 153 (forward-char -1)
156 (if (null xml) 154 (if (null xml)
157 (progn 155 (progn
158 (set 'result (xml-parse-tag end parse-dtd)) 156 (setq result (xml-parse-tag end parse-dtd))
159 (cond 157 (cond
158 ((null result))
160 ((listp (car result)) 159 ((listp (car result))
161 (set 'dtd (car result)) 160 (setq dtd (car result))
162 (add-to-list 'xml (cdr result))) 161 (add-to-list 'xml (cdr result)))
163 (t 162 (t
164 (add-to-list 'xml result)))) 163 (add-to-list 'xml result))))
165 164
166 ;; translation of rule [1] of XML specifications 165 ;; translation of rule [1] of XML specifications
195 (buffer-substring-no-properties pos (match-beginning 0)))) 194 (buffer-substring-no-properties pos (match-beginning 0))))
196 ;; DTD for the document 195 ;; DTD for the document
197 ((looking-at "<!DOCTYPE") 196 ((looking-at "<!DOCTYPE")
198 (let (dtd) 197 (let (dtd)
199 (if parse-dtd 198 (if parse-dtd
200 (set 'dtd (xml-parse-dtd end)) 199 (setq dtd (xml-parse-dtd end))
201 (xml-skip-dtd end)) 200 (xml-skip-dtd end))
202 (skip-chars-forward " \t\n") 201 (skip-chars-forward " \t\n")
203 (if dtd 202 (if dtd
204 (cons dtd (xml-parse-tag end)) 203 (cons dtd (xml-parse-tag end))
205 (xml-parse-tag end)))) 204 (xml-parse-tag end))))
206 ;; skip comments 205 ;; skip comments
207 ((looking-at "<!--") 206 ((looking-at "<!--")
208 (search-forward "-->" end) 207 (search-forward "-->" end)
209 (skip-chars-forward " \t\n") 208 nil)
210 (xml-parse-tag end))
211 ;; end tag 209 ;; end tag
212 ((looking-at "</") 210 ((looking-at "</")
213 '()) 211 '())
214 ;; opening tag 212 ;; opening tag
215 ((looking-at "<\\([^/> \t\n]+\\)") 213 ((looking-at "<\\([^/> \t\n]+\\)")
216 (let* ((node-name (match-string 1)) 214 (goto-char (match-end 1))
217 (children (list (intern node-name))) 215 (let* ((case-fold-search nil) ;; XML is case-sensitive.
218 (case-fold-search nil) ;; XML is case-sensitive 216 (node-name (match-string 1))
217 ;; Parse the attribute list.
218 (children (list (xml-parse-attlist end) (intern node-name)))
219 pos) 219 pos)
220 (goto-char (match-end 1))
221
222 ;; parses the attribute list
223 (set 'children (append children (list (xml-parse-attlist end))))
224 220
225 ;; is this an empty element ? 221 ;; is this an empty element ?
226 (if (looking-at "/>") 222 (if (looking-at "/>")
227 (progn 223 (progn
228 (forward-char 2) 224 (forward-char 2)
229 (skip-chars-forward " \t\n") 225 (nreverse (cons '("") children)))
230 (append children '("")))
231 226
232 ;; is this a valid start tag ? 227 ;; is this a valid start tag ?
233 (if (eq (char-after) ?>) 228 (if (eq (char-after) ?>)
234 (progn 229 (progn
235 (forward-char 1) 230 (forward-char 1)
236 (skip-chars-forward " \t\n") 231 ;; Now check that we have the right end-tag. Note that this
237 ;; Now check that we have the right end-tag. Note that this one might 232 ;; one might contain spaces after the tag name
238 ;; contain spaces after the tag name
239 (while (not (looking-at (concat "</" node-name "[ \t\n]*>"))) 233 (while (not (looking-at (concat "</" node-name "[ \t\n]*>")))
240 (cond 234 (cond
241 ((looking-at "</") 235 ((looking-at "</")
242 (error (concat 236 (error (concat
243 "XML: invalid syntax -- invalid end tag (expecting " 237 "XML: invalid syntax -- invalid end tag (expecting "
244 node-name 238 node-name
245 ") at pos " (number-to-string (point))))) 239 ") at pos " (number-to-string (point)))))
246 ((= (char-after) ?<) 240 ((= (char-after) ?<)
247 (set 'children (append children (list (xml-parse-tag end))))) 241 (let ((tag (xml-parse-tag end)))
242 (when tag
243 (push tag children))))
248 (t 244 (t
249 (set 'pos (point)) 245 (setq pos (point))
250 (search-forward "<" end) 246 (search-forward "<" end)
251 (forward-char -1) 247 (forward-char -1)
252 (let ((string (buffer-substring-no-properties pos (point))) 248 (let ((string (buffer-substring-no-properties pos (point)))
253 (pos 0)) 249 (pos 0))
254 250
255 ;; Clean up the string (no newline characters) 251 ;; Clean up the string (no newline characters)
256 ;; Not done, since as per XML specifications, the XML processor 252 ;; Not done, since as per XML specifications, the XML processor
257 ;; should always pass the whole string to the application. 253 ;; should always pass the whole string to the application.
258 ;; (while (string-match "\\s +" string pos) 254 ;; (while (string-match "\\s +" string pos)
259 ;; (set 'string (replace-match " " t t string)) 255 ;; (setq string (replace-match " " t t string))
260 ;; (set 'pos (1+ (match-beginning 0)))) 256 ;; (setq pos (1+ (match-beginning 0))))
261 257
262 (set 'children (append children 258 (setq string (xml-substitute-special string))
263 (list (xml-substitute-special string)))))))) 259 (setq children
260 (if (stringp (car children))
261 ;; The two strings were separated by a comment.
262 (cons (concat (car children) string)
263 (cdr children))
264 (cons string children)))))))
264 (goto-char (match-end 0)) 265 (goto-char (match-end 0))
265 (skip-chars-forward " \t\n")
266 (if (> (point) end) 266 (if (> (point) end)
267 (error "XML: End tag for %s not found before end of region" 267 (error "XML: End tag for %s not found before end of region"
268 node-name)) 268 node-name))
269 children 269 (nreverse children))
270 )
271 270
272 ;; This was an invalid start tag 271 ;; This was an invalid start tag
273 (error "XML: Invalid attribute list") 272 (error "XML: Invalid attribute list")
274 )))) 273 ))))
275 (t ;; This is not a tag. 274 (t ;; This is not a tag.
278 277
279 (defun xml-parse-attlist (end) 278 (defun xml-parse-attlist (end)
280 "Return the attribute-list that point is looking at. 279 "Return the attribute-list that point is looking at.
281 The search for attributes end at the position END in the current buffer. 280 The search for attributes end at the position END in the current buffer.
282 Leaves the point on the first non-blank character after the tag." 281 Leaves the point on the first non-blank character after the tag."
283 (let ((attlist '()) 282 (let ((attlist ())
284 name) 283 name)
285 (skip-chars-forward " \t\n") 284 (skip-chars-forward " \t\n")
286 (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n]*=[ \t\n]*") 285 (while (looking-at "\\([a-zA-Z_:][-a-zA-Z0-9._:]*\\)[ \t\n]*=[ \t\n]*")
287 (set 'name (intern (match-string 1))) 286 (setq name (intern (match-string 1)))
288 (goto-char (match-end 0)) 287 (goto-char (match-end 0))
289 288
290 ;; Do we have a string between quotes (or double-quotes), 289 ;; Do we have a string between quotes (or double-quotes),
291 ;; or a simple word ? 290 ;; or a simple word ?
292 (unless (looking-at "\"\\([^\"]+\\)\"") 291 (unless (looking-at "\"\\([^\"]+\\)\"")
295 294
296 ;; Each attribute must be unique within a given element 295 ;; Each attribute must be unique within a given element
297 (if (assoc name attlist) 296 (if (assoc name attlist)
298 (error "XML: each attribute must be unique within an element")) 297 (error "XML: each attribute must be unique within an element"))
299 298
300 (set 'attlist (append attlist 299 (push (cons name (match-string-no-properties 1)) attlist)
301 (list (cons name (match-string-no-properties 1)))))
302 (goto-char (match-end 0)) 300 (goto-char (match-end 0))
303 (skip-chars-forward " \t\n") 301 (skip-chars-forward " \t\n")
304 (if (> (point) end) 302 (if (> (point) end)
305 (error "XML: end of attribute list not found before end of region")) 303 (error "XML: end of attribute list not found before end of region"))
306 ) 304 )
307 attlist 305 (nreverse attlist)))
308 ))
309 306
310 ;;******************************************************************* 307 ;;*******************************************************************
311 ;;** 308 ;;**
312 ;;** The DTD (document type declaration) 309 ;;** The DTD (document type declaration)
313 ;;** The following functions know how to skip or parse the DTD of 310 ;;** The following functions know how to skip or parse the DTD of
333 (error (error "XML: No end to the DTD")))) 330 (error (error "XML: No end to the DTD"))))
334 331
335 (defun xml-parse-dtd (end) 332 (defun xml-parse-dtd (end)
336 "Parse the DTD that point is looking at. 333 "Parse the DTD that point is looking at.
337 The DTD must end before the position END in the current buffer." 334 The DTD must end before the position END in the current buffer."
338 (let (dtd type element end-pos) 335 (forward-char (length "<!DOCTYPE"))
339 (forward-char (length "<!DOCTYPE")) 336 (skip-chars-forward " \t\n")
340 (skip-chars-forward " \t\n") 337 (if (looking-at ">")
341 (if (looking-at ">") 338 (error "XML: invalid DTD (excepting name of the document)"))
342 (error "XML: invalid DTD (excepting name of the document)")) 339
343 340 ;; Get the name of the document
344 ;; Get the name of the document 341 (looking-at "\\sw+")
345 (looking-at "\\sw+") 342 (let ((dtd (list (match-string-no-properties 0) 'dtd))
346 (set 'dtd (list 'dtd (match-string-no-properties 0))) 343 type element end-pos)
347 (goto-char (match-end 0)) 344 (goto-char (match-end 0))
348 345
349 (skip-chars-forward " \t\n") 346 (skip-chars-forward " \t\n")
350 347
351 ;; External DTDs => don't know how to handle them yet 348 ;; External DTDs => don't know how to handle them yet
365 ((looking-at 362 ((looking-at
366 "[\t \n]*<!ELEMENT[ \t\n]+\\([a-zA-Z0-9.%;]+\\)[ \t\n]+\\([^>]+\\)>") 363 "[\t \n]*<!ELEMENT[ \t\n]+\\([a-zA-Z0-9.%;]+\\)[ \t\n]+\\([^>]+\\)>")
367 364
368 (setq element (intern (match-string-no-properties 1)) 365 (setq element (intern (match-string-no-properties 1))
369 type (match-string-no-properties 2)) 366 type (match-string-no-properties 2))
370 (set 'end-pos (match-end 0)) 367 (setq end-pos (match-end 0))
371 368
372 ;; Translation of rule [46] of XML specifications 369 ;; Translation of rule [46] of XML specifications
373 (cond 370 (cond
374 ((string-match "^EMPTY[ \t\n]*$" type) ;; empty declaration 371 ((string-match "^EMPTY[ \t\n]*$" type) ;; empty declaration
375 (set 'type 'empty)) 372 (setq type 'empty))
376 ((string-match "^ANY[ \t\n]*$" type) ;; any type of contents 373 ((string-match "^ANY[ \t\n]*$" type) ;; any type of contents
377 (set 'type 'any)) 374 (setq type 'any))
378 ((string-match "^(\\(.*\\))[ \t\n]*$" type) ;; children ([47]) 375 ((string-match "^(\\(.*\\))[ \t\n]*$" type) ;; children ([47])
379 (set 'type (xml-parse-elem-type (match-string-no-properties 1 type)))) 376 (setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
380 ((string-match "^%[^;]+;[ \t\n]*$" type) ;; substitution 377 ((string-match "^%[^;]+;[ \t\n]*$" type) ;; substitution
381 nil) 378 nil)
382 (t 379 (t
383 (error "XML: Invalid element type in the DTD"))) 380 (error "XML: Invalid element type in the DTD")))
384 381
386 (if (assoc element dtd) 383 (if (assoc element dtd)
387 (error "XML: elements declaration must be unique in a DTD (<%s>)" 384 (error "XML: elements declaration must be unique in a DTD (<%s>)"
388 (symbol-name element))) 385 (symbol-name element)))
389 386
390 ;; Store the element in the DTD 387 ;; Store the element in the DTD
391 (set 'dtd (append dtd (list (list element type)))) 388 (push (list element type) dtd)
392 (goto-char end-pos) 389 (goto-char end-pos))
393 )
394 390
395 391
396 (t 392 (t
397 (error "XML: Invalid DTD item")) 393 (error "XML: Invalid DTD item"))
398 ) 394 )
399 ) 395 )
400 396
401 ;; Skip the end of the DTD 397 ;; Skip the end of the DTD
402 (search-forward ">" end) 398 (search-forward ">" end)
403 dtd 399 (nreverse dtd)))
404 ))
405 400
406 401
407 (defun xml-parse-elem-type (string) 402 (defun xml-parse-elem-type (string)
408 "Convert a STRING for an element type into an elisp structure." 403 "Convert a STRING for an element type into an elisp structure."
409 404
411 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string) 406 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
412 (progn 407 (progn
413 (setq elem (match-string 1 string) 408 (setq elem (match-string 1 string)
414 modifier (match-string 2 string)) 409 modifier (match-string 2 string))
415 (if (string-match "|" elem) 410 (if (string-match "|" elem)
416 (set 'elem (append '(choice) 411 (setq elem (cons 'choice
417 (mapcar 'xml-parse-elem-type 412 (mapcar 'xml-parse-elem-type
418 (split-string elem "|")))) 413 (split-string elem "|"))))
419 (if (string-match "," elem) 414 (if (string-match "," elem)
420 (set 'elem (append '(seq) 415 (setq elem (cons 'seq
421 (mapcar 'xml-parse-elem-type 416 (mapcar 'xml-parse-elem-type
422 (split-string elem ",")))) 417 (split-string elem ","))))
423 ))) 418 )))
424 (if (string-match "[ \t\n]*\\([^+*?]+\\)\\([+*?]?\\)" string) 419 (if (string-match "[ \t\n]*\\([^+*?]+\\)\\([+*?]?\\)" string)
425 (setq elem (match-string 1 string) 420 (setq elem (match-string 1 string)
426 modifier (match-string 2 string)))) 421 modifier (match-string 2 string))))
427 422
428 (if (and (stringp elem) 423 (if (and (stringp elem) (string= elem "#PCDATA"))
429 (string= elem "#PCDATA")) 424 (setq elem 'pcdata))
430 (set 'elem 'pcdata))
431 425
432 (cond 426 (cond
433 ((string= modifier "+") 427 ((string= modifier "+")
434 (list '+ elem)) 428 (list '+ elem))
435 ((string= modifier "*") 429 ((string= modifier "*")
436 (list '* elem)) 430 (list '* elem))
437 ((string= modifier "?") 431 ((string= modifier "?")
438 (list '? elem)) 432 (list '? elem))
439 (t 433 (t
440 elem)))) 434 elem))))
441 435
442 436
443 ;;******************************************************************* 437 ;;*******************************************************************
444 ;;** 438 ;;**
445 ;;** Substituting special XML sequences 439 ;;** Substituting special XML sequences
447 ;;******************************************************************* 441 ;;*******************************************************************
448 442
449 (defun xml-substitute-special (string) 443 (defun xml-substitute-special (string)
450 "Return STRING, after subsituting special XML sequences." 444 "Return STRING, after subsituting special XML sequences."
451 (while (string-match "&amp;" string) 445 (while (string-match "&amp;" string)
452 (set 'string (replace-match "&" t nil string))) 446 (setq string (replace-match "&" t nil string)))
453 (while (string-match "&lt;" string) 447 (while (string-match "&lt;" string)
454 (set 'string (replace-match "<" t nil string))) 448 (setq string (replace-match "<" t nil string)))
455 (while (string-match "&gt;" string) 449 (while (string-match "&gt;" string)
456 (set 'string (replace-match ">" t nil string))) 450 (setq string (replace-match ">" t nil string)))
457 (while (string-match "&apos;" string) 451 (while (string-match "&apos;" string)
458 (set 'string (replace-match "'" t nil string))) 452 (setq string (replace-match "'" t nil string)))
459 (while (string-match "&quot;" string) 453 (while (string-match "&quot;" string)
460 (set 'string (replace-match "\"" t nil string))) 454 (setq string (replace-match "\"" t nil string)))
461 string) 455 string)
462 456
463 ;;******************************************************************* 457 ;;*******************************************************************
464 ;;** 458 ;;**
465 ;;** Printing a tree. 459 ;;** Printing a tree.
466 ;;** This function is intended mainly for debugging purposes. 460 ;;** This function is intended mainly for debugging purposes.
467 ;;** 461 ;;**
468 ;;******************************************************************* 462 ;;*******************************************************************
469 463
470 (defun xml-debug-print (xml) 464 (defun xml-debug-print (xml)
471 (while xml 465 (dolist (node xml)
472 (xml-debug-print-internal (car xml) "") 466 (xml-debug-print-internal node "")))
473 (set 'xml (cdr xml))) 467
474 ) 468 (defun xml-debug-print-internal (xml indent-string)
475
476 (defun xml-debug-print-internal (xml &optional indent-string)
477 "Outputs the XML tree in the current buffer. 469 "Outputs the XML tree in the current buffer.
478 The first line indented with INDENT-STRING." 470 The first line indented with INDENT-STRING."
479 (let ((tree xml) 471 (let ((tree xml)
480 attlist) 472 attlist)
481 (unless indent-string
482 (set 'indent-string ""))
483
484 (insert indent-string "<" (symbol-name (xml-node-name tree))) 473 (insert indent-string "<" (symbol-name (xml-node-name tree)))
485 474
486 ;; output the attribute list 475 ;; output the attribute list
487 (set 'attlist (xml-node-attributes tree)) 476 (setq attlist (xml-node-attributes tree))
488 (while attlist 477 (while attlist
489 (insert " ") 478 (insert " ")
490 (insert (symbol-name (caar attlist)) "=\"" (cdar attlist) "\"") 479 (insert (symbol-name (caar attlist)) "=\"" (cdar attlist) "\"")
491 (set 'attlist (cdr attlist))) 480 (setq attlist (cdr attlist)))
492 481
493 (insert ">") 482 (insert ">")
494 483
495 (set 'tree (xml-node-children tree)) 484 (setq tree (xml-node-children tree))
496 485
497 ;; output the children 486 ;; output the children
498 (while tree 487 (dolist (node tree)
499 (cond 488 (cond
500 ((listp (car tree)) 489 ((listp node)
501 (insert "\n") 490 (insert "\n")
502 (xml-debug-print-internal (car tree) (concat indent-string " ")) 491 (xml-debug-print-internal node (concat indent-string " ")))
503 ) 492 ((stringp node) (insert node))
504 ((stringp (car tree))
505 (insert (car tree))
506 )
507 (t 493 (t
508 (error "Invalid XML tree"))) 494 (error "Invalid XML tree"))))
509 (set 'tree (cdr tree))
510 )
511 495
512 (insert "\n" indent-string 496 (insert "\n" indent-string
513 "</" (symbol-name (xml-node-name xml)) ">") 497 "</" (symbol-name (xml-node-name xml)) ">")))
514 ))
515 498
516 (provide 'xml) 499 (provide 'xml)
517 500
518 ;;; xml.el ends here 501 ;;; xml.el ends here