changeset 12620:13599d978a31

[gaim-migrate @ 14956] SF Patch #1387727 from Dennis Lambe Jr. (malsyned) "When gaim_markup_strip_html removes HTML tags from log messages before writing them out, the contents of the href attribute of <a> tags gets lost, even though that information is much more valuable than HTML formatting information and would be very useful to have in logs. This patch causes gaim_markup_strip_html, when confronted with a chunk of text that looks like this: Check out <a href="http://gaim.sf.net/">this great program</a> I just found! to emit something like this: Check out this great program<http://gaim.sf.net/> I just found!" committer: Tailor Script <tailor@pidgin.im>
author Richard Laager <rlaager@wiktel.com>
date Thu, 22 Dec 2005 04:37:10 +0000
parents dc995f73c101
children 8aa0a62cf080
files src/util.c
diffstat 1 files changed, 67 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/util.c	Thu Dec 22 04:21:30 2005 +0000
+++ b/src/util.c	Thu Dec 22 04:37:10 2005 +0000
@@ -1254,6 +1254,8 @@
 	gboolean closing_td_p = FALSE;
 	gchar *str2;
 	const gchar *cdata_close_tag = NULL;
+	gchar *href = NULL;
+	int href_st = 0;
 
 	if(!str)
 		return NULL;
@@ -1306,8 +1308,70 @@
 					k++;
 				}
 
+				/* If we've got an <a> tag with an href, save the address
+				 * to print later. */
+				if (strncasecmp(str2 + i, "<a", 2) == 0 &&
+				    g_ascii_isspace(str2[i+2]))
+				{
+					int st; /* start of href, inclusive [ */
+					int end; /* end of href, exclusive ) */
+					char delim = ' ';
+					/* Find start of href */
+					for (st = i + 3; st < k; st++)
+					{
+						if (strncasecmp(str2+st, "href=", 5) == 0)
+						{
+							st += 5;
+							if (str2[st] == '"')
+							{
+								delim = '"';
+								st++;
+							}
+							break;
+						}
+					}
+					/* find end of address */
+					for (end = st; end < k && str2[end] != delim; end++)
+					{
+						/* All the work is done in the loop construct above. */
+					}
+
+					/* If there's an address, save it.  If there was
+					 * already one saved, kill it. */
+					if (st < k)
+					{
+						char *tmp;
+						g_free(href);
+						tmp = g_strndup(str2 + st, end - st);
+						href = gaim_unescape_html(tmp);
+						g_free(tmp);
+						href_st = j;
+					}
+				}
+
+				/* Replace </a> with an ascii representation of the
+				 * address the link was pointing to. */
+				else if (href != NULL && strncasecmp(str2 + i, "</a>", 4) == 0)
+				{
+
+					size_t hrlen = strlen(href);
+
+					/* Only insert the href if it's different from the CDATA. */
+					if (hrlen != j - href_st  ||
+					    strncmp(str2 + href_st, href, hrlen))
+					{
+						str2[j++] = ' ';
+						str2[j++] = '<';
+						g_memmove(str2 + j, href, hrlen);
+						j += hrlen;
+						str2[j++] = '>';
+						g_free(href);
+						href = NULL;
+					}
+				}
+
 				/* Check for tags which should be mapped to newline */
-				if (strncasecmp(str2 + i, "<p>", 3) == 0
+				else if (strncasecmp(str2 + i, "<p>", 3) == 0
 				 || strncasecmp(str2 + i, "<tr", 3) == 0
 				 || strncasecmp(str2 + i, "<br", 3) == 0
 				 || strncasecmp(str2 + i, "<li", 3) == 0
@@ -1388,6 +1452,8 @@
 			str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
 	}
 
+	g_free(href);
+
 	str2[j] = '\0';
 
 	return str2;