Mercurial > emacs
comparison lisp/xml.el @ 90789:c0409ee15cee
Merge from emacs--devo--0
Patches applied:
* emacs--devo--0 (patch 670-674)
- Update from CVS
- Merge from gnus--rel--5.10
* gnus--rel--5.10 (patch 209-210)
- Merge from emacs--devo--0
- Update from CVS
Revision: emacs@sv.gnu.org/emacs--unicode--0--patch-185
author | Miles Bader <miles@gnu.org> |
---|---|
date | Wed, 21 Mar 2007 13:33:07 +0000 |
parents | 95d0cdf160ea d1760553962c |
children | f55f9811f5d7 |
comparison
equal
deleted
inserted
replaced
90788:a12805fdabe8 | 90789:c0409ee15cee |
---|---|
74 ;; * more complete DOCTYPE parsing | 74 ;; * more complete DOCTYPE parsing |
75 ;; * pi support | 75 ;; * pi support |
76 | 76 |
77 ;;; Code: | 77 ;;; Code: |
78 | 78 |
79 ;; Note that {buffer-substring,match-string}-no-properties were | 79 ;; Note that buffer-substring and match-string were formerly used in |
80 ;; formerly used in several places, but that removes composition info. | 80 ;; several places, because the -no-properties variants remove |
81 ;; composition info. However, after some discussion on emacs-devel, | |
82 ;; the consensus was that the speed of the -no-properties variants was | |
83 ;; a worthwhile tradeoff especially since we're usually parsing files | |
84 ;; instead of hand-crafted XML. | |
81 | 85 |
82 ;;******************************************************************* | 86 ;;******************************************************************* |
83 ;;** | 87 ;;** |
84 ;;** Macros to parse the list | 88 ;;** Macros to parse the list |
85 ;;** | 89 ;;** |
404 ((looking-at "<!\\[CDATA\\[") | 408 ((looking-at "<!\\[CDATA\\[") |
405 (let ((pos (match-end 0))) | 409 (let ((pos (match-end 0))) |
406 (unless (search-forward "]]>" nil t) | 410 (unless (search-forward "]]>" nil t) |
407 (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) | 411 (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) |
408 (concat | 412 (concat |
409 (buffer-substring pos (match-beginning 0)) | 413 (buffer-substring-no-properties pos (match-beginning 0)) |
410 (xml-parse-string)))) | 414 (xml-parse-string)))) |
411 ;; DTD for the document | 415 ;; DTD for the document |
412 ((looking-at "<!DOCTYPE") | 416 ((looking-at "<!DOCTYPE") |
413 (let ((dtd (xml-parse-dtd parse-ns))) | 417 (let ((dtd (xml-parse-dtd parse-ns))) |
414 (skip-syntax-forward " ") | 418 (skip-syntax-forward " ") |
425 ;; opening tag | 429 ;; opening tag |
426 ((looking-at "<\\([^/>[:space:]]+\\)") | 430 ((looking-at "<\\([^/>[:space:]]+\\)") |
427 (goto-char (match-end 1)) | 431 (goto-char (match-end 1)) |
428 | 432 |
429 ;; Parse this node | 433 ;; Parse this node |
430 (let* ((node-name (match-string 1)) | 434 (let* ((node-name (match-string-no-properties 1)) |
431 ;; Parse the attribute list. | 435 ;; Parse the attribute list. |
432 (attrs (xml-parse-attlist xml-ns)) | 436 (attrs (xml-parse-attlist xml-ns)) |
433 children pos) | 437 children pos) |
434 | 438 |
435 ;; add the xmlns:* attrs to our cache | 439 ;; add the xmlns:* attrs to our cache |
478 | 482 |
479 (goto-char (match-end 0)) | 483 (goto-char (match-end 0)) |
480 (nreverse children))) | 484 (nreverse children))) |
481 ;; This was an invalid start tag (Expected ">", but didn't see it.) | 485 ;; This was an invalid start tag (Expected ">", but didn't see it.) |
482 (error "XML: (Well-Formed) Couldn't parse tag: %s" | 486 (error "XML: (Well-Formed) Couldn't parse tag: %s" |
483 (buffer-substring (- (point) 10) (+ (point) 1))))))) | 487 (buffer-substring-no-properties (- (point) 10) (+ (point) 1))))))) |
484 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) | 488 (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) |
485 (unless xml-sub-parser ; Usually, we error out. | 489 (unless xml-sub-parser ; Usually, we error out. |
486 (error "XML: (Well-Formed) Invalid character")) | 490 (error "XML: (Well-Formed) Invalid character")) |
487 | 491 |
488 ;; However, if we're parsing incrementally, then we need to deal | 492 ;; However, if we're parsing incrementally, then we need to deal |
493 "Parse the next whatever. Could be a string, or an element." | 497 "Parse the next whatever. Could be a string, or an element." |
494 (let* ((pos (point)) | 498 (let* ((pos (point)) |
495 (string (progn (if (search-forward "<" nil t) | 499 (string (progn (if (search-forward "<" nil t) |
496 (forward-char -1) | 500 (forward-char -1) |
497 (goto-char (point-max))) | 501 (goto-char (point-max))) |
498 (buffer-substring pos (point))))) | 502 (buffer-substring-no-properties pos (point))))) |
499 ;; Clean up the string. As per XML specifications, the XML | 503 ;; Clean up the string. As per XML specifications, the XML |
500 ;; processor should always pass the whole string to the | 504 ;; processor should always pass the whole string to the |
501 ;; application. But \r's should be replaced: | 505 ;; application. But \r's should be replaced: |
502 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends | 506 ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends |
503 (setq pos 0) | 507 (setq pos 0) |
514 end-pos name) | 518 end-pos name) |
515 (skip-syntax-forward " ") | 519 (skip-syntax-forward " ") |
516 (while (looking-at (eval-when-compile | 520 (while (looking-at (eval-when-compile |
517 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) | 521 (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) |
518 (setq end-pos (match-end 0)) | 522 (setq end-pos (match-end 0)) |
519 (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns)) | 523 (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns)) |
520 (goto-char end-pos) | 524 (goto-char end-pos) |
521 | 525 |
522 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize | 526 ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize |
523 | 527 |
524 ;; Do we have a string between quotes (or double-quotes), | 528 ;; Do we have a string between quotes (or double-quotes), |
533 (if (assoc name attlist) | 537 (if (assoc name attlist) |
534 (error "XML: (Not Well-Formed) Each attribute must be unique within an element")) | 538 (error "XML: (Not Well-Formed) Each attribute must be unique within an element")) |
535 | 539 |
536 ;; Multiple whitespace characters should be replaced with a single one | 540 ;; Multiple whitespace characters should be replaced with a single one |
537 ;; in the attributes | 541 ;; in the attributes |
538 (let ((string (match-string 1)) | 542 (let ((string (match-string-no-properties 1)) |
539 (pos 0)) | 543 (pos 0)) |
540 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) | 544 (replace-regexp-in-string "\\s-\\{2,\\}" " " string) |
541 (let ((expansion (xml-substitute-special string))) | 545 (let ((expansion (xml-substitute-special string))) |
542 (unless (stringp expansion) | 546 (unless (stringp expansion) |
543 ; We say this is the constraint. It is acctually that | 547 ; We say this is the constraint. It is acctually that |
573 xml-validating-parser) | 577 xml-validating-parser) |
574 (error "XML: (Validity) Invalid DTD (expecting name of the document)")) | 578 (error "XML: (Validity) Invalid DTD (expecting name of the document)")) |
575 | 579 |
576 ;; Get the name of the document | 580 ;; Get the name of the document |
577 (looking-at xml-name-regexp) | 581 (looking-at xml-name-regexp) |
578 (let ((dtd (list (match-string 0) 'dtd)) | 582 (let ((dtd (list (match-string-no-properties 0) 'dtd)) |
579 type element end-pos) | 583 type element end-pos) |
580 (goto-char (match-end 0)) | 584 (goto-char (match-end 0)) |
581 | 585 |
582 (skip-syntax-forward " ") | 586 (skip-syntax-forward " ") |
583 ;; XML [75] | 587 ;; XML [75] |
588 nil t) | 592 nil t) |
589 (re-search-forward | 593 (re-search-forward |
590 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" | 594 "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" |
591 nil t)) | 595 nil t)) |
592 (error "XML: Missing Public ID")) | 596 (error "XML: Missing Public ID")) |
593 (let ((pubid (match-string 1))) | 597 (let ((pubid (match-string-no-properties 1))) |
594 (skip-syntax-forward " ") | 598 (skip-syntax-forward " ") |
595 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) | 599 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) |
596 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) | 600 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) |
597 (error "XML: Missing System ID")) | 601 (error "XML: Missing System ID")) |
598 (push (list pubid (match-string 1) 'public) dtd))) | 602 (push (list pubid (match-string-no-properties 1) 'public) dtd))) |
599 ((looking-at "SYSTEM\\s-+") | 603 ((looking-at "SYSTEM\\s-+") |
600 (goto-char (match-end 0)) | 604 (goto-char (match-end 0)) |
601 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) | 605 (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) |
602 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) | 606 (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) |
603 (error "XML: Missing System ID")) | 607 (error "XML: Missing System ID")) |
604 (push (list (match-string 1) 'system) dtd))) | 608 (push (list (match-string-no-properties 1) 'system) dtd))) |
605 (skip-syntax-forward " ") | 609 (skip-syntax-forward " ") |
606 (if (eq ?> (char-after)) | 610 (if (eq ?> (char-after)) |
607 (forward-char) | 611 (forward-char) |
608 (if (not (eq (char-after) ?\[)) | 612 (if (not (eq (char-after) ?\[)) |
609 (error "XML: Bad DTD") | 613 (error "XML: Bad DTD") |
616 | 620 |
617 ;; Translation of rule [45] of XML specifications | 621 ;; Translation of rule [45] of XML specifications |
618 ((looking-at | 622 ((looking-at |
619 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") | 623 "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") |
620 | 624 |
621 (setq element (match-string 1) | 625 (setq element (match-string-no-properties 1) |
622 type (match-string-no-properties 2)) | 626 type (match-string-no-properties 2)) |
623 (setq end-pos (match-end 0)) | 627 (setq end-pos (match-end 0)) |
624 | 628 |
625 ;; Translation of rule [46] of XML specifications | 629 ;; Translation of rule [46] of XML specifications |
626 (cond | 630 (cond |
627 ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration | 631 ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration |
628 (setq type 'empty)) | 632 (setq type 'empty)) |
629 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents | 633 ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents |
630 (setq type 'any)) | 634 (setq type 'any)) |
631 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) | 635 ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) |
632 (setq type (xml-parse-elem-type (match-string 1 type)))) | 636 (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) |
633 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution | 637 ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution |
634 nil) | 638 nil) |
635 (t | 639 (t |
636 (if xml-validating-parser | 640 (if xml-validating-parser |
637 (error "XML: (Validity) Invalid element type in the DTD")))) | 641 (error "XML: (Validity) Invalid element type in the DTD")))) |
657 ((looking-at "<!--") | 661 ((looking-at "<!--") |
658 (search-forward "-->")) | 662 (search-forward "-->")) |
659 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re | 663 ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re |
660 "\\)[ \t\n\r]*\\(" xml-entity-value-re | 664 "\\)[ \t\n\r]*\\(" xml-entity-value-re |
661 "\\)[ \t\n\r]*>")) | 665 "\\)[ \t\n\r]*>")) |
662 (let ((name (match-string 1)) | 666 (let ((name (match-string-no-properties 1)) |
663 (value (substring (match-string 2) 1 | 667 (value (substring (match-string-no-properties 2) 1 |
664 (- (length (match-string 2)) 1)))) | 668 (- (length (match-string-no-properties 2)) 1)))) |
665 (goto-char (match-end 0)) | 669 (goto-char (match-end 0)) |
666 (setq xml-entity-alist | 670 (setq xml-entity-alist |
667 (append xml-entity-alist | 671 (append xml-entity-alist |
668 (list (cons name | 672 (list (cons name |
669 (with-temp-buffer | 673 (with-temp-buffer |
679 "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" | 683 "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" |
680 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" | 684 "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" |
681 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" | 685 "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" |
682 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" | 686 "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" |
683 "[ \t\n\r]*>"))) | 687 "[ \t\n\r]*>"))) |
684 (let ((name (match-string 1)) | 688 (let ((name (match-string-no-properties 1)) |
685 (file (substring (match-string 2) 1 | 689 (file (substring (match-string-no-properties 2) 1 |
686 (- (length (match-string 2)) 1)))) | 690 (- (length (match-string-no-properties 2)) 1)))) |
687 (goto-char (match-end 0)) | 691 (goto-char (match-end 0)) |
688 (setq xml-entity-alist | 692 (setq xml-entity-alist |
689 (append xml-entity-alist | 693 (append xml-entity-alist |
690 (list (cons name (with-temp-buffer | 694 (list (cons name (with-temp-buffer |
691 (insert-file-contents file) | 695 (insert-file-contents file) |
720 "Convert element type STRING into a Lisp structure." | 724 "Convert element type STRING into a Lisp structure." |
721 | 725 |
722 (let (elem modifier) | 726 (let (elem modifier) |
723 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string) | 727 (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string) |
724 (progn | 728 (progn |
725 (setq elem (match-string 1 string) | 729 (setq elem (match-string-no-properties 1 string) |
726 modifier (match-string 2 string)) | 730 modifier (match-string-no-properties 2 string)) |
727 (if (string-match "|" elem) | 731 (if (string-match "|" elem) |
728 (setq elem (cons 'choice | 732 (setq elem (cons 'choice |
729 (mapcar 'xml-parse-elem-type | 733 (mapcar 'xml-parse-elem-type |
730 (split-string elem "|")))) | 734 (split-string elem "|")))) |
731 (if (string-match "," elem) | 735 (if (string-match "," elem) |
732 (setq elem (cons 'seq | 736 (setq elem (cons 'seq |
733 (mapcar 'xml-parse-elem-type | 737 (mapcar 'xml-parse-elem-type |
734 (split-string elem ","))))))) | 738 (split-string elem ","))))))) |
735 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) | 739 (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) |
736 (setq elem (match-string 1 string) | 740 (setq elem (match-string-no-properties 1 string) |
737 modifier (match-string 2 string)))) | 741 modifier (match-string-no-properties 2 string)))) |
738 | 742 |
739 (if (and (stringp elem) (string= elem "#PCDATA")) | 743 (if (and (stringp elem) (string= elem "#PCDATA")) |
740 (setq elem 'pcdata)) | 744 (setq elem 'pcdata)) |
741 | 745 |
742 (cond | 746 (cond |
763 | 767 |
764 (let ((point 0) | 768 (let ((point 0) |
765 children end-point) | 769 children end-point) |
766 (while (string-match "&\\([^;]*\\);" string point) | 770 (while (string-match "&\\([^;]*\\);" string point) |
767 (setq end-point (match-end 0)) | 771 (setq end-point (match-end 0)) |
768 (let* ((this-part (match-string 1 string)) | 772 (let* ((this-part (match-string-no-properties 1 string)) |
769 (prev-part (substring string point (match-beginning 0))) | 773 (prev-part (substring string point (match-beginning 0))) |
770 (entity (assoc this-part xml-entity-alist)) | 774 (entity (assoc this-part xml-entity-alist)) |
771 (expansion | 775 (expansion |
772 (cond ((string-match "#\\([0-9]+\\)" this-part) | 776 (cond ((string-match "#\\([0-9]+\\)" this-part) |
773 (let ((c (decode-char | 777 (let ((c (decode-char |
774 'ucs | 778 'ucs |
775 (string-to-number (match-string 1 this-part))))) | 779 (string-to-number (match-string-no-properties 1 this-part))))) |
776 (if c (string c)))) | 780 (if c (string c)))) |
777 ((string-match "#x\\([[:xdigit:]]+\\)" this-part) | 781 ((string-match "#x\\([[:xdigit:]]+\\)" this-part) |
778 (let ((c (decode-char | 782 (let ((c (decode-char |
779 'ucs | 783 'ucs |
780 (string-to-number (match-string 1 this-part) 16)))) | 784 (string-to-number (match-string-no-properties 1 this-part) 16)))) |
781 (if c (string c)))) | 785 (if c (string c)))) |
782 (entity | 786 (entity |
783 (cdr entity)) | 787 (cdr entity)) |
784 ((eq (length this-part) 0) | 788 ((eq (length this-part) 0) |
785 (error "XML: (Not Well-Formed) No entity given")) | 789 (error "XML: (Not Well-Formed) No entity given")) |