changeset 111835:05fc128a856f

Return CDATA sections (like <style>foo</style>) as text nodes. Also ignore blank HTML nodes.
author Lars Magne Ingebrigtsen <larsi@gnus.org>
date Mon, 06 Dec 2010 17:59:52 +0100
parents 6951f234b0c3
children c92a6fe1fa59
files src/ChangeLog src/xml.c
diffstat 2 files changed, 9 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Mon Dec 06 11:51:37 2010 -0500
+++ b/src/ChangeLog	Mon Dec 06 17:59:52 2010 +0100
@@ -1,3 +1,9 @@
+2010-12-06  Lars Magne Ingebrigtsen  <larsi@gnus.org>
+
+	* xml.c (parse_region): Ignore blank HTML nodes.
+	(make_dom): Return CDATA sections (like <style>foo</style>) as
+	text nodes.
+
 2010-12-06  Stefan Monnier  <monnier@iro.umontreal.ca>
 
 	* lread.c (read1): Allow newstyle unquote outside of backquote.
--- a/src/xml.c	Mon Dec 06 11:51:37 2010 -0500
+++ b/src/xml.c	Mon Dec 06 17:59:52 2010 +0100
@@ -62,7 +62,7 @@
 
       return Fnreverse (result);
     }
-  else if (node->type == XML_TEXT_NODE)
+  else if (node->type == XML_TEXT_NODE || node->type == XML_CDATA_SECTION_NODE)
     {
       if (node->content)
 	return build_string (node->content);
@@ -105,7 +105,8 @@
     doc = htmlReadMemory (BYTE_POS_ADDR (CHAR_TO_BYTE (istart)),
 			  bytes, burl, "utf-8",
 			  HTML_PARSE_RECOVER|HTML_PARSE_NONET|
-			  HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR);
+			  HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR|
+			  HTML_PARSE_NOBLANKS);
   else
     doc = xmlReadMemory (BYTE_POS_ADDR (CHAR_TO_BYTE (istart)),
 			 bytes, burl, "utf-8",