# HG changeset patch # User Juanma Barranquero # Date 1047811614 0 # Node ID ff1b1d15e1f233d73c15098e9e7c845143854ed5 # Parent 93622ce43bddb47c81178ae69c5d2992d58952a0 (xml-ucs-to-string): New function to convert Unicode codepoints to strings. Uses decode-char (mule.el) if available. (xml-parse-tag, xml-parse-attlist, xml-skip-dtd, xml-parse-dtd, xml-parse-elem-type): Use ' \t\n\r' instead of '[:space:]'. (xml-parse-attlist): Added attribute normalization. (xml-parse-tag): Replace "\r\n" and "\r" with "\n". diff -r 93622ce43bdd -r ff1b1d15e1f2 lisp/xml.el --- a/lisp/xml.el Sun Mar 16 01:09:05 2003 +0000 +++ b/lisp/xml.el Sun Mar 16 10:46:54 2003 +0000 @@ -184,7 +184,7 @@ ;; beginning of a document) ((looking-at "<\\?") (search-forward "?>" end) - (goto-char (- (re-search-forward "[^[:space:]]") 1)) + (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) (xml-parse-tag end)) ;; Character data (CDATA) sections, in which no tag should be interpreted ((looking-at "[:space:]]+\\)") + ((looking-at "<\\([^/> \t\n\r]+\\)") (goto-char (match-end 1)) (let* ((case-fold-search nil) ;; XML is case-sensitive. (node-name (match-string 1)) @@ -219,7 +219,7 @@ pos) ;; is this an empty element ? - (if (looking-at "/[[:space:]]*>") + (if (looking-at "/[ \t\n\r]*>") (progn (forward-char 2) (nreverse (cons '("") children))) @@ -230,7 +230,7 @@ (forward-char 1) ;; Now check that we have the right end-tag. Note that this ;; one might contain spaces after the tag name - (while (not (looking-at (concat ""))) + (while (not (looking-at (concat ""))) (cond ((looking-at " (point) end) - (error "XML: end of attribute list not found before end of region")) - ) + (error "XML: end of attribute list not found before end of region"))) (nreverse attlist))) ;;******************************************************************* @@ -318,15 +336,15 @@ The point must be just before the starting tag of the DTD. This follows the rule [28] in the XML specifications." (forward-char (length "") + (if (looking-at "[ \t\n\r]*>") (error "XML: invalid DTD (excepting name of the document)")) (condition-case nil (progn - (forward-word 1) ;; name of the document - (goto-char (- (re-search-forward "[[:space:]]") 1)) - (goto-char (- (re-search-forward "[^[:space:]]") 1)) + (forward-word 1) + (goto-char (- (re-search-forward "[ \t\n\r]") 1)) + (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) (if (looking-at "\\[") - (re-search-forward "\\][[:space:]]*>" end) + (re-search-forward "\\][ \t\n\r]*>" end) (search-forward ">" end))) (error (error "XML: No end to the DTD")))) @@ -334,7 +352,7 @@ "Parse the DTD that point is looking at. The DTD must end before the position END in the current buffer." (forward-char (length "") (error "XML: invalid DTD (excepting name of the document)")) @@ -344,24 +362,24 @@ type element end-pos) (goto-char (match-end 0)) - (goto-char (- (re-search-forward "[^[:space:]]") 1)) + (goto-char (- (re-search-forward "[^ \t\n\r]") 1)) - ;; External DTDs => don't know how to handle them yet + ;; External DTDs => don't know how to handle them yet (if (looking-at "SYSTEM") (error "XML: Don't know how to handle external DTDs")) (if (not (= (char-after) ?\[)) (error "XML: Unknown declaration in the DTD")) - ;; Parse the rest of the DTD + ;; Parse the rest of the DTD (forward-char 1) - (while (and (not (looking-at "[[:space:]]*\\]")) + (while (and (not (looking-at "[ \t\n\r]*\\]")) (<= (point) end)) (cond ;; Translation of rule [45] of XML specifications ((looking-at - "[[:space:]]*]+\\)>") + "[ \t\n\r]*]+\\)>") (setq element (intern (match-string-no-properties 1)) type (match-string-no-properties 2)) @@ -369,13 +387,13 @@ ;; Translation of rule [46] of XML specifications (cond - ((string-match "^EMPTY[[:space:]]*$" type) ;; empty declaration + ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration (setq type 'empty)) - ((string-match "^ANY[[:space:]]*$" type) ;; any type of contents + ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents (setq type 'any)) - ((string-match "^(\\(.*\\))[[:space:]]*$" type) ;; children ([47]) + ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) - ((string-match "^%[^;]+;[[:space:]]*$" type) ;; substitution + ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution nil) (t (error "XML: Invalid element type in the DTD"))) @@ -417,8 +435,8 @@ (mapcar 'xml-parse-elem-type (split-string elem ",")))) ))) - (if (string-match "[[:space:]]*\\([^+*?]+\\)\\([+*?]?\\)" string) - (setq elem (match-string 1 string) + (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string) + (setq elem (match-string 1 string) modifier (match-string 2 string)))) (if (and (stringp elem) (string= elem "#PCDATA")) @@ -434,6 +452,22 @@ (t elem)))) +;;******************************************************************* +;;** +;;** Converting code points to strings +;;** +;;******************************************************************* + +(defun xml-ucs-to-string (codepoint) + "Return a string representation of CODEPOINT. If it can't be +converted, return '?'." + (cond ((boundp 'decode-char) + (char-to-string (decode-char 'ucs codepoint))) + ((and (< codepoint 128) + (> codepoint 31)) + (char-to-string codepoint)) + (t "?"))) ; FIXME: There's gotta be a better way to + ; designate an unknown character. ;;******************************************************************* ;;** @@ -451,6 +485,18 @@ (setq string (replace-match "'" t nil string))) (while (string-match """ string) (setq string (replace-match "\"" t nil string))) + (while (string-match "&#\\([0-9]+\\);" string) + (setq string (replace-match (xml-ucs-to-string + (string-to-number + (match-string-no-properties 1 string))) + t nil string))) + (while (string-match "&#x\\([0-9a-fA-F]+\\);" string) + (setq string (replace-match (xml-ucs-to-string + (string-to-number + (match-string-no-properties 1 string) + 16)) + t nil string))) + ;; This goes last so it doesn't confuse the matches above. (while (string-match "&" string) (setq string (replace-match "&" t nil string)))