changeset 9241:5e9018c81bd9

[gaim-migrate @ 10040] Wing says he improved gaim_markup_strip_html. This patch modifies gaim_markup_strip_html to 1. Remove <style>...</style> and <script>...</script> completely 2. Turn <td> into a tab instead of a space; this will be needed for the msn profile code For #1 above, the </style> / </script> matching does not behave well when fed with technically-correct-but-implausible input, such as </style > and </script > committer: Tailor Script <tailor@pidgin.im>
author Tim Ringenbach <marv@pidgin.im>
date Wed, 09 Jun 2004 01:34:16 +0000
parents f1d87ab17e41
children fb517adf4972
files src/util.c
diffstat 1 files changed, 38 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/src/util.c	Wed Jun 09 01:24:47 2004 +0000
+++ b/src/util.c	Wed Jun 09 01:34:16 2004 +0000
@@ -1226,9 +1226,10 @@
 /* The following are probably reasonable changes:
  * - \n should be converted to a normal space
  * - in addition to <br>, <p> and <div> etc. should also be converted into \n
- * - We want to turn </td>#whitespace<td> sequences into a single blank
+ * - We want to turn </td>#whitespace<td> sequences into a single tab
+ * - We want to turn <td> into a single tab (for msn profile "parsing")
  * - We want to turn </tr>#whitespace<tr> sequences into a single \n
- * We should remove all <script>...</script> etc. This should be fixed some time
+ * - <script>...</script> and <style>...</style> should be completely removed
  */
 
 char *
@@ -1238,6 +1239,7 @@
 	gboolean visible = TRUE;
 	gboolean closing_td_p = FALSE;
 	gchar *str2;
+	const gchar *cdata_close_tag = NULL;
 
 	if(!str)
 		return NULL;
@@ -1248,9 +1250,20 @@
 	{
 		if (str2[i] == '<')
 		{
-			if (strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
+			if (cdata_close_tag)
 			{
-				str2[j++] = ' ';
+				/* Note: Don't even assume any other tag is a tag in CDATA */
+				if (strncasecmp(str2 + i, cdata_close_tag,
+						strlen(cdata_close_tag)) == 0)
+				{
+					i += strlen(cdata_close_tag) - 1;
+					cdata_close_tag = NULL;
+				}
+				continue;
+			}
+			else if (strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
+			{
+				str2[j++] = '\t';
 				visible = TRUE;
 			}
 			else if (strncasecmp(str2 + i, "</td>", 5) == 0)
@@ -1278,6 +1291,7 @@
 				{
 					k++;
 				}
+
 				/* Check for tags which should be mapped to newline */
 				if (strncasecmp(str2 + i, "<p>", 3) == 0
 				 || strncasecmp(str2 + i, "<tr", 3) == 0
@@ -1288,11 +1302,31 @@
 				{
 					str2[j++] = '\n';
 				}
+				/* Check for tags which begin CDATA and need to be closed */
+#if 0 /* FIXME.. option is end tag optional, we can't handle this right now */
+				else if (strncasecmp(str2 + i, "<option", 7) == 0)
+				{
+					/* FIXME: We should not do this if the OPTION is SELECT'd */
+					cdata_close_tag = "</option>";
+				}
+#endif
+				else if (strncasecmp(str2 + i, "<script", 7) == 0)
+				{
+					cdata_close_tag = "</script>";
+				}
+				else if (strncasecmp(str2 + i, "<style", 6) == 0)
+				{
+					cdata_close_tag = "</style>";
+				}
 				/* Update the index and continue checking after the tag */
 				i = (str2[k] == '<')? k - 1: k;
 				continue;
 			}
 		}
+		else if (cdata_close_tag)
+		{
+			continue;
+		}
 		else if (!g_ascii_isspace(str2[i]))
 		{
 			visible = TRUE;