# HG changeset patch # User Paul Aurich # Date 1244056156 0 # Node ID bf9db4c67679b866ff34e59a22617f46ead02aa2 # Parent 22bcf150f6c7640e2fd674e7bfb8596bcdda242d Add purple_utf8_strip_unprintables and use it on outgoing XMPP messages. We will no longer send messages which contain entities considered invalid in XML 1.0 (i.e.  and other ASCII control characters). Closes #5768. diff -r 22bcf150f6c7 -r bf9db4c67679 ChangeLog --- a/ChangeLog Wed Jun 03 17:09:18 2009 +0000 +++ b/ChangeLog Wed Jun 03 19:09:16 2009 +0000 @@ -41,6 +41,8 @@ * /affiliate and /role will now list the room members with the specified affiliation/role if possible. (Andrei Mozzhuhin) * Put section breaks between resources in "Get Info" to improve readability. + * Silently remove invalid XML 1.0 entities (e.g. ASCII control characters) + from sent messages. * XHTML markup is only included in outgoing messages when the message contains formatting. * Show when the user was last logged in when doing "Get Info" on an offline diff -r 22bcf150f6c7 -r bf9db4c67679 ChangeLog.API --- a/ChangeLog.API Wed Jun 03 17:09:18 2009 +0000 +++ b/ChangeLog.API Wed Jun 03 19:09:16 2009 +0000 @@ -49,6 +49,7 @@ * purple_request_field_get_ui_data * purple_request_field_set_ui_data * purple_strequal + * purple_utf8_strip_unprintables * xmlnode_from_file * xmlnode_get_parent * xmlnode_set_attrib_full diff -r 22bcf150f6c7 -r bf9db4c67679 libpurple/protocols/jabber/message.c --- a/libpurple/protocols/jabber/message.c Wed Jun 03 17:09:18 2009 +0000 +++ b/libpurple/protocols/jabber/message.c Wed Jun 03 19:09:16 2009 +0000 @@ -1190,7 +1190,9 @@ jm->typing_style |= JM_TS_JEP_0022; } - purple_markup_html_to_xhtml(msg, &xhtml, &jm->body); + tmp = purple_utf8_strip_unprintables(msg); + purple_markup_html_to_xhtml(tmp, &xhtml, &jm->body); + g_free(tmp); tmp = jabber_message_smileyfy_xhtml(jm, xhtml); if (tmp) { g_free(xhtml); @@ -1231,7 +1233,9 @@ jm->to = g_strdup_printf("%s@%s", chat->room, chat->server); jm->id = jabber_get_next_id(jm->js); + tmp = purple_utf8_strip_unprintables(msg); purple_markup_html_to_xhtml(msg, &xhtml, &jm->body); + g_free(tmp); tmp = jabber_message_smileyfy_xhtml(jm, xhtml); if (tmp) { g_free(xhtml); diff -r 22bcf150f6c7 -r bf9db4c67679 libpurple/util.c --- a/libpurple/util.c Wed Jun 03 17:09:18 2009 +0000 +++ b/libpurple/util.c Wed Jun 03 19:09:16 2009 +0000 @@ -4424,6 +4424,34 @@ return g_string_free(workstr, FALSE); } +gchar * +purple_utf8_strip_unprintables(const gchar *str) +{ + gchar *workstr, *iter; + + g_return_val_if_fail(str != NULL, NULL); + g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL); + + workstr = iter = g_new(gchar, strlen(str) + 1); + while (*str) { + gunichar c = g_utf8_get_char(str); + const gchar *next = g_utf8_next_char(str); + size_t len = next - str; + + if (g_unichar_isprint(c)) { + memcpy(iter, str, len); + iter += len; + } + + str = next; + } + + /* nul-terminate the new string */ + *iter = '\0'; + + return workstr; +} + /* * This function is copied from g_strerror() but changed to use * gai_strerror(). diff -r 22bcf150f6c7 -r bf9db4c67679 libpurple/util.h --- a/libpurple/util.h Wed Jun 03 17:09:18 2009 +0000 +++ b/libpurple/util.h Wed Jun 03 19:09:16 2009 +0000 @@ -1248,6 +1248,21 @@ gchar *purple_utf8_salvage(const char *str); /** + * Removes unprintable characters from a UTF-8 string. These characters + * (in particular low-ASCII characters) are invalid in XML 1.0 and thus + * are not allowed in XMPP and are rejected by libxml2 by default. This + * function uses g_unichar_isprint to determine what characters should + * be stripped. The returned string must be freed by the caller. + * + * @param str A valid UTF-8 string. + * + * @return A newly allocated UTF-8 string without the unprintable characters. + * + * @see g_unichar_isprint + */ +gchar *purple_utf8_strip_unprintables(const gchar *str); + +/** * Return the UTF-8 version of gai_strerror(). It calls gai_strerror() * then converts the result to UTF-8. This function is analogous to * g_strerror().