comparison lisp/xml.el @ 56375:2e4e974fa50b

2004-07-09 Mark A. Hershberger <mah@everybody.org> * xml.el (xml-maybe-do-ns, xml-parse-tag): Produce elements in the form (("ns" . "element") (attr-list) children) instead of ((:ns . "element") (attr-list) children) in order to reduce the number of symbols used. (xml-skip-dtd): Change to use xml-parse-dtd but set xml-validating-parsing to nil. (xml-parse-dtd): Parse entity deleclarations in DOCTYPEs. (xml-substitute-entity): Remove in favor of new entity substitution. (xml-substitute-special): Rewrite in to substitute complex entities from DOCTYPE declarations. (xml-parse-fragment): Parse fragments from entity deleclarations. (xml-parse-region, xml-parse-tag, xml-parse-attlist) (xml-parse-dtd, xml-substitute-special): Make validity checks conditioned on xml-validating-parser. Add "Not Well Formed" to error messages about well-formedness.
author Mark A. Hershberger <mah@everybody.org>
date Fri, 09 Jul 2004 14:21:12 +0000
parents 7ac80356d84c
children 95ee82562c2a 029a652ac817
comparison
equal deleted inserted replaced
56374:e784f4b6c134 56375:2e4e974fa50b
82 ;;** 82 ;;**
83 ;;** Macros to parse the list 83 ;;** Macros to parse the list
84 ;;** 84 ;;**
85 ;;******************************************************************* 85 ;;*******************************************************************
86 86
87 (defvar xml-entity-alist
88 '(("lt" . "<")
89 ("gt" . ">")
90 ("apos" . "'")
91 ("quot" . "\"")
92 ("amp" . "&"))
93 "The defined entities. Entities are added to this when the DTD is parsed.")
94
95 (defvar xml-sub-parser nil
96 "Dynamically set this to a non-nil value if you want to parse an XML fragment.")
97
98 (defvar xml-validating-parser nil
99 "Set to non-nil to get validity checking.")
100
87 (defsubst xml-node-name (node) 101 (defsubst xml-node-name (node)
88 "Return the tag associated with NODE. 102 "Return the tag associated with NODE.
89 Without namespace-aware parsing, the tag is a symbol. 103 Without namespace-aware parsing, the tag is a symbol.
90 104
91 With namespace-aware parsing, the tag is a cons of a string 105 With namespace-aware parsing, the tag is a cons of a string
161 parse-dtd parse-ns))) 175 parse-dtd parse-ns)))
162 (if keep 176 (if keep
163 (goto-char keep) 177 (goto-char keep)
164 (kill-buffer (current-buffer))) 178 (kill-buffer (current-buffer)))
165 xml))) 179 xml)))
180
181
182 (let* ((start-chars (concat ":[:alpha:]_"))
183 (name-chars (concat "-[:digit:]." start-chars))
184 ;;[3] S ::= (#x20 | #x9 | #xD | #xA)+
185 (whitespace "[ \t\n\r]"))
186 ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
187 ;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
188 ;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
189 ;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
190 (defvar xml-name-start-char-re (concat "[" start-chars "]"))
191 ;;[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
192 (defvar xml-name-char-re (concat "[" name-chars "]"))
193 ;;[5] Name ::= NameStartChar (NameChar)*
194 (defvar xml-name-re (concat xml-name-start-char-re xml-name-char-re "*"))
195 ;;[6] Names ::= Name (#x20 Name)*
196 (defvar xml-names-re (concat xml-name-re "\\(?: " xml-name-re "\\)*"))
197 ;;[7] Nmtoken ::= (NameChar)+
198 (defvar xml-nmtoken-re (concat xml-name-char-re "+"))
199 ;;[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
200 (defvar xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*"))
201 ;;[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
202 (defvar xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)")
203 ;;[68] EntityRef ::= '&' Name ';'
204 (defvar xml-entity-ref (concat "&" xml-name-re ";"))
205 ;;[69] PEReference ::= '%' Name ';'
206 (defvar xml-pe-reference-re (concat "%" xml-name-re ";"))
207 ;;[67] Reference ::= EntityRef | CharRef
208 (defvar xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)"))
209 ;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
210 ;; | "'" ([^%&'] | PEReference | Reference)* "'"
211 (defvar xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|" xml-pe-reference-re
212 "\\|" xml-reference-re "\\)*\"\\|'\\(?:[^%&']\\|"
213 xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)")))
214 ;;[75] ExternalID ::= 'SYSTEM' S SystemLiteral
215 ;; | 'PUBLIC' S PubidLiteral S SystemLiteral
216 ;;[76] NDataDecl ::= S 'NDATA' S
217 ;;[73] EntityDef ::= EntityValue| (ExternalID NDataDecl?)
218 ;;[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
219 ;;[74] PEDef ::= EntityValue | ExternalID
220 ;;[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
221 ;;[70] EntityDecl ::= GEDecl | PEDecl
166 222
167 ;; Note that this is setup so that we can do whitespace-skipping with 223 ;; Note that this is setup so that we can do whitespace-skipping with
168 ;; `(skip-syntax-forward " ")', inter alia. Previously this was slow 224 ;; `(skip-syntax-forward " ")', inter alia. Previously this was slow
169 ;; compared with `re-search-forward', but that has been fixed. Also 225 ;; compared with `re-search-forward', but that has been fixed. Also
170 ;; note that the standard syntax table contains other characters with 226 ;; note that the standard syntax table contains other characters with
227 (while (not (eobp)) 283 (while (not (eobp))
228 (if (search-forward "<" nil t) 284 (if (search-forward "<" nil t)
229 (progn 285 (progn
230 (forward-char -1) 286 (forward-char -1)
231 (setq result (xml-parse-tag parse-dtd parse-ns)) 287 (setq result (xml-parse-tag parse-dtd parse-ns))
232 (if (and xml result) 288 (if (and xml result (not xml-sub-parser))
233 ;; translation of rule [1] of XML specifications 289 ;; translation of rule [1] of XML specifications
234 (error "XML files can have only one toplevel tag") 290 (error "XML: (Not Well-Formed) Only one root tag allowed")
235 (cond 291 (cond
236 ((null result)) 292 ((null result))
237 ((and (listp (car result)) 293 ((and (listp (car result))
238 parse-dtd) 294 parse-dtd)
239 (setq dtd (car result)) 295 (setq dtd (car result))
263 (special (and (string-equal lname "xmlns") (not prefix))) 319 (special (and (string-equal lname "xmlns") (not prefix)))
264 ;; Setting default to nil will insure that there is not 320 ;; Setting default to nil will insure that there is not
265 ;; matching cons in xml-ns. In which case we 321 ;; matching cons in xml-ns. In which case we
266 (ns (or (cdr (assoc (if special "xmlns" prefix) 322 (ns (or (cdr (assoc (if special "xmlns" prefix)
267 xml-ns)) 323 xml-ns))
268 :))) 324 "")))
269 (cons ns (if special "" lname))) 325 (cons ns (if special "" lname)))
270 (intern name))) 326 (intern name)))
327
328 (defun xml-parse-fragment (&optional parse-dtd parse-ns)
329 "Parse xml-like fragments."
330 (let ((xml-sub-parser t)
331 children)
332 (while (not (eobp))
333 (let ((bit (xml-parse-tag
334 parse-dtd parse-ns)))
335 (if children
336 (setq children (append (list bit) children))
337 (if (stringp bit)
338 (setq children (list bit))
339 (setq children bit)))))
340 (reverse children)))
271 341
272 (defun xml-parse-tag (&optional parse-dtd parse-ns) 342 (defun xml-parse-tag (&optional parse-dtd parse-ns)
273 "Parse the tag at point. 343 "Parse the tag at point.
274 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and 344 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
275 returned as the first element in the list. 345 returned as the first element in the list.
276 If PARSE-NS is non-nil, then QNAMES are expanded. 346 If PARSE-NS is non-nil, then QNAMES are expanded.
277 Returns one of: 347 Returns one of:
278 - a list : the matching node 348 - a list : the matching node
279 - nil : the point is not looking at a tag. 349 - nil : the point is not looking at a tag.
280 - a pair : the first element is the DTD, the second is the node." 350 - a pair : the first element is the DTD, the second is the node."
281 (let ((xml-ns (if (consp parse-ns) 351 (let ((xml-validating-parser (or parse-dtd xml-validating-parser))
352 (xml-ns (if (consp parse-ns)
282 parse-ns 353 parse-ns
283 (if parse-ns 354 (if parse-ns
284 (list 355 (list
285 ;; Default for empty prefix is no namespace 356 ;; Default for empty prefix is no namespace
286 (cons "" :) 357 (cons "" "")
287 ;; "xml" namespace 358 ;; "xml" namespace
288 (cons "xml" :http://www.w3.org/XML/1998/namespace) 359 (cons "xml" "http://www.w3.org/XML/1998/namespace")
289 ;; We need to seed the xmlns namespace 360 ;; We need to seed the xmlns namespace
290 (cons "xmlns" :http://www.w3.org/2000/xmlns/)))))) 361 (cons "xmlns" "http://www.w3.org/2000/xmlns/"))))))
291 (cond 362 (cond
292 ;; Processing instructions (like the <?xml version="1.0"?> tag at the 363 ;; Processing instructions (like the <?xml version="1.0"?> tag at the
293 ;; beginning of a document). 364 ;; beginning of a document).
294 ((looking-at "<\\?") 365 ((looking-at "<\\?")
295 (search-forward "?>") 366 (search-forward "?>")
297 (xml-parse-tag parse-dtd xml-ns)) 368 (xml-parse-tag parse-dtd xml-ns))
298 ;; Character data (CDATA) sections, in which no tag should be interpreted 369 ;; Character data (CDATA) sections, in which no tag should be interpreted
299 ((looking-at "<!\\[CDATA\\[") 370 ((looking-at "<!\\[CDATA\\[")
300 (let ((pos (match-end 0))) 371 (let ((pos (match-end 0)))
301 (unless (search-forward "]]>" nil t) 372 (unless (search-forward "]]>" nil t)
302 (error "CDATA section does not end anywhere in the document")) 373 (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
303 (buffer-substring pos (match-beginning 0)))) 374 (buffer-substring pos (match-beginning 0))))
304 ;; DTD for the document 375 ;; DTD for the document
305 ((looking-at "<!DOCTYPE") 376 ((looking-at "<!DOCTYPE")
306 (let (dtd) 377 (let ((dtd (xml-parse-dtd parse-ns)))
307 (if parse-dtd 378 (skip-syntax-forward " ")
308 (setq dtd (xml-parse-dtd)) 379 (if xml-validating-parser
309 (xml-skip-dtd)) 380 (cons dtd (xml-parse-tag nil xml-ns))
310 (skip-syntax-forward " ") 381 (xml-parse-tag nil xml-ns))))
311 (if dtd
312 (cons dtd (xml-parse-tag nil xml-ns))
313 (xml-parse-tag nil xml-ns))))
314 ;; skip comments 382 ;; skip comments
315 ((looking-at "<!--") 383 ((looking-at "<!--")
316 (search-forward "-->") 384 (search-forward "-->")
317 nil) 385 nil)
318 ;; end tag 386 ;; end tag
330 398
331 ;; add the xmlns:* attrs to our cache 399 ;; add the xmlns:* attrs to our cache
332 (when (consp xml-ns) 400 (when (consp xml-ns)
333 (dolist (attr attrs) 401 (dolist (attr attrs)
334 (when (and (consp (car attr)) 402 (when (and (consp (car attr))
335 (eq :http://www.w3.org/2000/xmlns/ 403 (equal "http://www.w3.org/2000/xmlns/"
336 (caar attr))) 404 (caar attr)))
337 (push (cons (cdar attr) (intern (concat ":" (cdr attr)))) 405 (push (cons (cdar attr) (cdr attr))
338 xml-ns)))) 406 xml-ns))))
339 407
340 (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns))) 408 (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
341 409
342 ;; is this an empty element ? 410 ;; is this an empty element ?
343 (if (looking-at "/>") 411 (if (looking-at "/>")
344 (progn
345 (forward-char 2)
346 (nreverse children))
347
348 ;; is this a valid start tag ?
349 (if (eq (char-after) ?>)
350 (progn 412 (progn
351 (forward-char 1) 413 (forward-char 2)
352 ;; Now check that we have the right end-tag. Note that this
353 ;; one might contain spaces after the tag name
354 (let ((end (concat "</" node-name "\\s-*>")))
355 (while (not (looking-at end))
356 (cond
357 ((looking-at "</")
358 (error "XML: Invalid end tag (expecting %s) at pos %d"
359 node-name (point)))
360 ((= (char-after) ?<)
361 (let ((tag (xml-parse-tag nil xml-ns)))
362 (when tag
363 (push tag children))))
364 (t
365 (setq pos (point))
366 (search-forward "<")
367 (forward-char -1)
368 (let ((string (buffer-substring pos (point)))
369 (pos 0))
370
371 ;; Clean up the string. As per XML
372 ;; specifications, the XML processor should
373 ;; always pass the whole string to the
374 ;; application. But \r's should be replaced:
375 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
376 (while (string-match "\r\n?" string pos)
377 (setq string (replace-match "\n" t t string))
378 (setq pos (1+ (match-beginning 0))))
379
380 (setq string (xml-substitute-special string))
381 (setq children
382 (if (stringp (car children))
383 ;; The two strings were separated by a comment.
384 (cons (concat (car children) string)
385 (cdr children))
386 (cons string children))))))))
387
388 (goto-char (match-end 0))
389 (nreverse children)) 414 (nreverse children))
390 ;; This was an invalid start tag 415
391 (error "XML: Invalid attribute list"))))) 416 ;; is this a valid start tag ?
392 (t ;; This is not a tag. 417 (if (eq (char-after) ?>)
393 (error "XML: Invalid character"))))) 418 (progn
419 (forward-char 1)
420 ;; Now check that we have the right end-tag. Note that this
421 ;; one might contain spaces after the tag name
422 (let ((end (concat "</" node-name "\\s-*>")))
423 (while (not (looking-at end))
424 (cond
425 ((looking-at "</")
426 (error "XML: (Not Well-Formed) Invalid end tag (expecting %s) at pos %d"
427 node-name (point)))
428 ((= (char-after) ?<)
429 (let ((tag (xml-parse-tag nil xml-ns)))
430 (when tag
431 (push tag children))))
432 (t
433 (let ((expansion (xml-parse-string)))
434 (setq children
435 (if (stringp expansion)
436 (if (stringp (car children))
437 ;; The two strings were separated by a comment.
438 (setq children (append (concat (car children) expansion)
439 (cdr children)))
440 (setq children (append (list expansion) children)))
441 (setq children (append expansion children))))))))
442
443 (goto-char (match-end 0))
444 (nreverse children)))
445 ;; This was an invalid start tag (Expected ">", but didn't see it.)
446 (error "XML: (Well-Formed) Couldn't parse tag: %s"
447 (buffer-substring (- (point) 10) (+ (point) 1)))))))
448 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
449 (unless xml-sub-parser ; Usually, we error out.
450 (error "XML: (Well-Formed) Invalid character"))
451
452 ;; However, if we're parsing incrementally, then we need to deal
453 ;; with stray CDATA.
454 (xml-parse-string)))))
455
456 (defun xml-parse-string ()
457 "Parse the next whatever. Could be a string, or an element."
458 (let* ((pos (point))
459 (string (progn (if (search-forward "<" nil t)
460 (forward-char -1)
461 (goto-char (point-max)))
462 (buffer-substring pos (point)))))
463 ;; Clean up the string. As per XML specifications, the XML
464 ;; processor should always pass the whole string to the
465 ;; application. But \r's should be replaced:
466 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends
467 (setq pos 0)
468 (while (string-match "\r\n?" string pos)
469 (setq string (replace-match "\n" t t string))
470 (setq pos (1+ (match-beginning 0))))
471
472 (xml-substitute-special string)))
394 473
395 (defun xml-parse-attlist (&optional xml-ns) 474 (defun xml-parse-attlist (&optional xml-ns)
396 "Return the attribute-list after point. 475 "Return the attribute-list after point.
397 Leave point at the first non-blank character after the tag." 476 Leave point at the first non-blank character after the tag."
398 (let ((attlist ()) 477 (let ((attlist ())
410 ;; or a simple word ? 489 ;; or a simple word ?
411 (if (looking-at "\"\\([^\"]*\\)\"") 490 (if (looking-at "\"\\([^\"]*\\)\"")
412 (setq end-pos (match-end 0)) 491 (setq end-pos (match-end 0))
413 (if (looking-at "'\\([^']*\\)'") 492 (if (looking-at "'\\([^']*\\)'")
414 (setq end-pos (match-end 0)) 493 (setq end-pos (match-end 0))
415 (error "XML: Attribute values must be given between quotes"))) 494 (error "XML: (Not Well-Formed) Attribute values must be given between quotes")))
416 495
417 ;; Each attribute must be unique within a given element 496 ;; Each attribute must be unique within a given element
418 (if (assoc name attlist) 497 (if (assoc name attlist)
419 (error "XML: each attribute must be unique within an element")) 498 (error "XML: (Not Well-Formed) Each attribute must be unique within an element"))
420 499
421 ;; Multiple whitespace characters should be replaced with a single one 500 ;; Multiple whitespace characters should be replaced with a single one
422 ;; in the attributes 501 ;; in the attributes
423 (let ((string (match-string 1)) 502 (let ((string (match-string 1))
424 (pos 0)) 503 (pos 0))
425 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) 504 (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
426 (push (cons name (xml-substitute-special string)) attlist)) 505 (let ((expansion (xml-substitute-special string)))
506 (unless (stringp expansion)
507 ; We say this is the constraint. It is acctually that
508 ; external entities nor "<" can be in an attribute value.
509 (error "XML: (Not Well-Formed) Entities in attributes cannot expand into elements"))
510 (push (cons name expansion) attlist)))
427 511
428 (goto-char end-pos) 512 (goto-char end-pos)
429 (skip-syntax-forward " ")) 513 (skip-syntax-forward " "))
430 (nreverse attlist))) 514 (nreverse attlist)))
431 515
440 ;; Fixme: This fails at least if the DTD contains conditional sections. 524 ;; Fixme: This fails at least if the DTD contains conditional sections.
441 525
442 (defun xml-skip-dtd () 526 (defun xml-skip-dtd ()
443 "Skip the DTD at point. 527 "Skip the DTD at point.
444 This follows the rule [28] in the XML specifications." 528 This follows the rule [28] in the XML specifications."
445 (forward-char (length "<!DOCTYPE")) 529 (let ((xml-validating-parser nil))
446 (if (looking-at "\\s-*>") 530 (xml-parse-dtd)))
447 (error "XML: invalid DTD (excepting name of the document)")) 531
448 (condition-case nil 532 (defun xml-parse-dtd (&optional parse-ns)
449 (progn
450 (forward-sexp)
451 (skip-syntax-forward " ")
452 (if (looking-at "\\[")
453 (re-search-forward "]\\s-*>")
454 (search-forward ">")))
455 (error (error "XML: No end to the DTD"))))
456
457 (defun xml-parse-dtd ()
458 "Parse the DTD at point." 533 "Parse the DTD at point."
459 (forward-char (eval-when-compile (length "<!DOCTYPE"))) 534 (forward-char (eval-when-compile (length "<!DOCTYPE")))
460 (skip-syntax-forward " ") 535 (skip-syntax-forward " ")
461 (if (looking-at ">") 536 (if (and (looking-at ">")
462 (error "XML: invalid DTD (excepting name of the document)")) 537 xml-validating-parser)
538 (error "XML: (Validity) Invalid DTD (expecting name of the document)"))
463 539
464 ;; Get the name of the document 540 ;; Get the name of the document
465 (looking-at xml-name-regexp) 541 (looking-at xml-name-regexp)
466 (let ((dtd (list (match-string 0) 'dtd)) 542 (let ((dtd (list (match-string 0) 'dtd))
467 type element end-pos) 543 type element end-pos)
475 "\\=\"\\([[:space:][:alnum:]-'()+,./:=?;!*#@$_%]*\\)\"" 551 "\\=\"\\([[:space:][:alnum:]-'()+,./:=?;!*#@$_%]*\\)\""
476 nil t) 552 nil t)
477 (re-search-forward 553 (re-search-forward
478 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" 554 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
479 nil t)) 555 nil t))
480 (error "XML: missing public id")) 556 (error "XML: Missing Public ID"))
481 (let ((pubid (match-string 1))) 557 (let ((pubid (match-string 1)))
558 (skip-syntax-forward " ")
482 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) 559 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
483 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) 560 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
484 (error "XML: missing system id")) 561 (error "XML: Missing System ID"))
485 (push (list pubid (match-string 1) 'public) dtd))) 562 (push (list pubid (match-string 1) 'public) dtd)))
486 ((looking-at "SYSTEM\\s-+") 563 ((looking-at "SYSTEM\\s-+")
487 (goto-char (match-end 0)) 564 (goto-char (match-end 0))
488 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) 565 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
489 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) 566 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
490 (error "XML: missing system id")) 567 (error "XML: Missing System ID"))
491 (push (list (match-string 1) 'system) dtd))) 568 (push (list (match-string 1) 'system) dtd)))
492 (skip-syntax-forward " ") 569 (skip-syntax-forward " ")
493 (if (eq ?> (char-after)) 570 (if (eq ?> (char-after))
494 (forward-char) 571 (forward-char)
495 (skip-syntax-forward " ")
496 (if (not (eq (char-after) ?\[)) 572 (if (not (eq (char-after) ?\[))
497 (error "XML: bad DTD") 573 (error "XML: Bad DTD")
498 (forward-char) 574 (forward-char)
499 ;; Parse the rest of the DTD 575 ;; Parse the rest of the DTD
500 ;; Fixme: Deal with ENTITY, ATTLIST, NOTATION, PIs. 576 ;; Fixme: Deal with ATTLIST, NOTATION, PIs.
501 (while (not (looking-at "\\s-*\\]")) 577 (while (not (looking-at "\\s-*\\]"))
502 (skip-syntax-forward " ") 578 (skip-syntax-forward " ")
503 (cond 579 (cond
504 580
505 ;; Translation of rule [45] of XML specifications 581 ;; Translation of rule [45] of XML specifications
519 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) 595 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
520 (setq type (xml-parse-elem-type (match-string 1 type)))) 596 (setq type (xml-parse-elem-type (match-string 1 type))))
521 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution 597 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
522 nil) 598 nil)
523 (t 599 (t
524 (error "XML: Invalid element type in the DTD"))) 600 (if xml-validating-parser
601 error "XML: (Validity) Invalid element type in the DTD")))
525 602
526 ;; rule [45]: the element declaration must be unique 603 ;; rule [45]: the element declaration must be unique
527 (if (assoc element dtd) 604 (if (and (assoc element dtd)
528 (error "XML: element declarations must be unique in a DTD (<%s>)" 605 xml-validating-parser)
606 (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)"
529 element)) 607 element))
530 608
531 ;; Store the element in the DTD 609 ;; Store the element in the DTD
532 (push (list element type) dtd) 610 (push (list element type) dtd)
533 (goto-char end-pos)) 611 (goto-char end-pos))
534 ((looking-at "<!--") 612 ((looking-at "<!--")
535 (search-forward "-->")) 613 (search-forward "-->"))
536 614 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
615 "\\)[ \t\n\r]*\\(" xml-entity-value-re
616 "\\)[ \t\n\r]*>"))
617 (let ((name (buffer-substring (nth 2 (match-data))
618 (nth 3 (match-data))))
619 (value (buffer-substring (+ (nth 4 (match-data)) 1)
620 (- (nth 5 (match-data)) 1))))
621 (goto-char (nth 1 (match-data)))
622 (setq xml-entity-alist
623 (append xml-entity-alist
624 (list (cons name
625 (with-temp-buffer
626 (insert value)
627 (goto-char (point-min))
628 (xml-parse-fragment
629 xml-validating-parser
630 parse-ns))))))))
631 ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
632 "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
633 "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>"))
634 (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re
635 "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
636 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
637 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
638 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
639 "[ \t\n\r]*>")))
640 (let ((name (buffer-substring (nth 2 (match-data))
641 (nth 3 (match-data))))
642 (file (buffer-substring (+ (nth 4 (match-data)) 1)
643 (- (nth 5 (match-data)) 1))))
644 (goto-char (nth 1 (match-data)))
645 (setq xml-entity-alist
646 (append xml-entity-alist
647 (list (cons name (with-temp-buffer
648 (insert-file-contents file)
649 (goto-char (point-min))
650 (xml-parse-fragment
651 xml-validating-parser
652 parse-ns))))))))
537 (t 653 (t
538 (error "XML: Invalid DTD item"))) 654 (error "XML: (Validity) Invalid DTD item")))))
539 655 (if (looking-at "\\s-*]>")
540 ;; Skip the end of the DTD 656 (goto-char (nth 1 (match-data)))))
541 (search-forward ">"))))
542 (nreverse dtd))) 657 (nreverse dtd)))
543 658
544 (defun xml-parse-elem-type (string) 659 (defun xml-parse-elem-type (string)
545 "Convert element type STRING into a Lisp structure." 660 "Convert element type STRING into a Lisp structure."
546 661
578 ;;** 693 ;;**
579 ;;** Substituting special XML sequences 694 ;;** Substituting special XML sequences
580 ;;** 695 ;;**
581 ;;******************************************************************* 696 ;;*******************************************************************
582 697
583 (eval-when-compile
584 (defvar str)) ; dynamic from replace-regexp-in-string
585
586 ;; Fixme: Take declared entities from the DTD when they're available.
587 (defun xml-substitute-entity (match)
588 "Subroutine of `xml-substitute-special'."
589 (save-match-data
590 (let ((match1 (match-string 1 str)))
591 (cond ((string= match1 "lt") "<")
592 ((string= match1 "gt") ">")
593 ((string= match1 "apos") "'")
594 ((string= match1 "quot") "\"")
595 ((string= match1 "amp") "&")
596 ((and (string-match "#\\([0-9]+\\)" match1)
597 (let ((c (decode-char
598 'ucs
599 (string-to-number (match-string 1 match1)))))
600 (if c (string c))))) ; else unrepresentable
601 ((and (string-match "#x\\([[:xdigit:]]+\\)" match1)
602 (let ((c (decode-char
603 'ucs
604 (string-to-number (match-string 1 match1) 16))))
605 (if c (string c)))))
606 ;; Default to asis. Arguably, unrepresentable code points
607 ;; might be best replaced with U+FFFD.
608 (t match)))))
609
610 (defun xml-substitute-special (string) 698 (defun xml-substitute-special (string)
611 "Return STRING, after subsituting entity references." 699 "Return STRING, after subsituting entity references."
612 ;; This originally made repeated passes through the string from the 700 ;; This originally made repeated passes through the string from the
613 ;; beginning, which isn't correct, since then either "&amp;amp;" or 701 ;; beginning, which isn't correct, since then either "&amp;amp;" or
614 ;; "&#38;amp;" won't DTRT. 702 ;; "&#38;amp;" won't DTRT.
615 (replace-regexp-in-string "&\\([^;]+\\);" 703
616 #'xml-substitute-entity string t t)) 704 (let ((point 0)
617 705 children end-point)
706 (while (string-match "&\\([^;]+\\);" string point)
707 (setq end-point (match-end 0))
708 (let* ((this-part (match-string 1 string))
709 (prev-part (substring string point (match-beginning 0)))
710 (entity (assoc this-part xml-entity-alist))
711 (expansion
712 (cond ((string-match "#\\([0-9]+\\)" this-part)
713 (let ((c (decode-char
714 'ucs
715 (string-to-number (match-string 1 this-part)))))
716 (if c (string c))))
717 ((string-match "#x\\([[:xdigit:]]+\\)" this-part)
718 (let ((c (decode-char
719 'ucs
720 (string-to-number (match-string 1 this-part) 16))))
721 (if c (string c))))
722 (entity
723 (cdr entity))
724 (t
725 (if xml-validating-parser
726 (error "XML: (Validity) Undefined entity `%s'"
727 (match-string 1 this-part)))))))
728
729 (cond ((null children)
730 (if (stringp expansion)
731 (setq children (concat prev-part expansion))
732 (if (stringp (car (last expansion)))
733 (progn
734 (setq children
735 (list (concat prev-part (car expansion))
736 (cdr expansion))))
737 (setq children (append expansion prev-part)))))
738 ((stringp children)
739 (if (stringp expansion)
740 (setq children (concat children prev-part expansion))
741 (setq children (list expansion (concat prev-part children)))))
742 ((and (stringp expansion)
743 (stringp (car children)))
744 (setcar children (concat prev-part expansion (car children))))
745 ((stringp expansion)
746 (setq children (append (concat prev-part expansion)
747 children)))
748 ((stringp (car children))
749 (setcar children (concat (car children) prev-part))
750 (setq children (append expansion children)))
751 (t
752 (setq children (list expansion
753 prev-part
754 children))))
755 (setq point end-point)))
756 (cond ((stringp children)
757 (concat children (substring string point)))
758 ((stringp (car (last children)))
759 (concat (car children) (substring string point)))
760 ((null children)
761 string)
762 (t
763 (nreverse children)))))
618 ;;******************************************************************* 764 ;;*******************************************************************
619 ;;** 765 ;;**
620 ;;** Printing a tree. 766 ;;** Printing a tree.
621 ;;** This function is intended mainly for debugging purposes. 767 ;;** This function is intended mainly for debugging purposes.
622 ;;** 768 ;;**