changeset 27101:bf9db4c67679

Add purple_utf8_strip_unprintables and use it on outgoing XMPP messages. We will no longer send messages which contain entities considered invalid in XML 1.0 (i.e.  and other ASCII control characters). Closes #5768.
author Paul Aurich <paul@darkrain42.org>
date Wed, 03 Jun 2009 19:09:16 +0000
parents 22bcf150f6c7
children 2f297ab00e9d
files ChangeLog ChangeLog.API libpurple/protocols/jabber/message.c libpurple/util.c libpurple/util.h
diffstat 5 files changed, 51 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Wed Jun 03 17:09:18 2009 +0000
+++ b/ChangeLog	Wed Jun 03 19:09:16 2009 +0000
@@ -41,6 +41,8 @@
 	* /affiliate and /role will now list the room members with the specified
 	  affiliation/role if possible. (Andrei Mozzhuhin)
 	* Put section breaks between resources in "Get Info" to improve readability.
+	* Silently remove invalid XML 1.0 entities (e.g. ASCII control characters)
+	  from sent messages.
 	* XHTML markup is only included in outgoing messages when the message
 	  contains formatting.
 	* Show when the user was last logged in when doing "Get Info" on an offline
--- a/ChangeLog.API	Wed Jun 03 17:09:18 2009 +0000
+++ b/ChangeLog.API	Wed Jun 03 19:09:16 2009 +0000
@@ -49,6 +49,7 @@
 		* purple_request_field_get_ui_data
 		* purple_request_field_set_ui_data
 		* purple_strequal
+		* purple_utf8_strip_unprintables
 		* xmlnode_from_file
 		* xmlnode_get_parent
 		* xmlnode_set_attrib_full
--- a/libpurple/protocols/jabber/message.c	Wed Jun 03 17:09:18 2009 +0000
+++ b/libpurple/protocols/jabber/message.c	Wed Jun 03 19:09:16 2009 +0000
@@ -1190,7 +1190,9 @@
 			jm->typing_style |= JM_TS_JEP_0022;
 	}
 
-	purple_markup_html_to_xhtml(msg, &xhtml, &jm->body);
+	tmp = purple_utf8_strip_unprintables(msg);
+	purple_markup_html_to_xhtml(tmp, &xhtml, &jm->body);
+	g_free(tmp);
 	tmp = jabber_message_smileyfy_xhtml(jm, xhtml);
 	if (tmp) {
 		g_free(xhtml);
@@ -1231,7 +1233,9 @@
 	jm->to = g_strdup_printf("%s@%s", chat->room, chat->server);
 	jm->id = jabber_get_next_id(jm->js);
 
+	tmp = purple_utf8_strip_unprintables(msg);
 	purple_markup_html_to_xhtml(msg, &xhtml, &jm->body);
+	g_free(tmp);
 	tmp = jabber_message_smileyfy_xhtml(jm, xhtml);
 	if (tmp) {
 		g_free(xhtml);
--- a/libpurple/util.c	Wed Jun 03 17:09:18 2009 +0000
+++ b/libpurple/util.c	Wed Jun 03 19:09:16 2009 +0000
@@ -4424,6 +4424,34 @@
 	return g_string_free(workstr, FALSE);
 }
 
+gchar *
+purple_utf8_strip_unprintables(const gchar *str)
+{
+	gchar *workstr, *iter;
+
+	g_return_val_if_fail(str != NULL, NULL);
+	g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL);
+
+	workstr = iter = g_new(gchar, strlen(str) + 1);
+	while (*str) {
+		gunichar c = g_utf8_get_char(str);
+		const gchar *next = g_utf8_next_char(str);
+		size_t len = next - str;
+
+		if (g_unichar_isprint(c)) {
+			memcpy(iter, str, len);
+			iter += len;
+		}
+
+		str = next;
+	}
+
+	/* nul-terminate the new string */
+	*iter = '\0';
+
+	return workstr;
+}
+
 /*
  * This function is copied from g_strerror() but changed to use
  * gai_strerror().
--- a/libpurple/util.h	Wed Jun 03 17:09:18 2009 +0000
+++ b/libpurple/util.h	Wed Jun 03 19:09:16 2009 +0000
@@ -1248,6 +1248,21 @@
 gchar *purple_utf8_salvage(const char *str);
 
 /**
+ * Removes unprintable characters from a UTF-8 string. These characters
+ * (in particular low-ASCII characters) are invalid in XML 1.0 and thus
+ * are not allowed in XMPP and are rejected by libxml2 by default. This
+ * function uses g_unichar_isprint to determine what characters should
+ * be stripped. The returned string must be freed by the caller.
+ *
+ * @param str A valid UTF-8 string.
+ *
+ * @return A newly allocated UTF-8 string without the unprintable characters.
+ *
+ * @see g_unichar_isprint
+ */
+gchar *purple_utf8_strip_unprintables(const gchar *str);
+
+/**
  * Return the UTF-8 version of gai_strerror().  It calls gai_strerror()
  * then converts the result to UTF-8.  This function is analogous to
  * g_strerror().