# HG changeset patch # User Paul Aurich # Date 1272561420 0 # Node ID 5bac51b394e67ca4ba1a5593fc636169fd41fab9 # Parent 2a436e0ce97714eee2ffbeae5055e8c808e124e0 util: Better validation of the allowed character values in XML 1.0 From http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char. Refs #11257 This doesn't actually make a difference, because I think all the invalid ranges aren't valid UTF-8 and so g_utf8_validate catches them. diff -r 2a436e0ce977 -r 5bac51b394e6 libpurple/tests/test_util.c --- a/libpurple/tests/test_util.c Thu Apr 29 05:58:02 2010 +0000 +++ b/libpurple/tests/test_util.c Thu Apr 29 17:17:00 2010 +0000 @@ -121,6 +121,33 @@ } END_TEST +START_TEST(test_utf8_strip_unprintables) +{ + fail_unless(NULL == purple_utf8_strip_unprintables(NULL)); + /* invalid UTF-8 */ +#if 0 + /* disabled because make check fails on an assertion */ + fail_unless(NULL == purple_utf8_strip_unprintables("abc\x80\x7f")); +#endif + /* \t, \n, \r, space */ + assert_string_equal_free("ab \tcd\nef\r ", purple_utf8_strip_unprintables("ab \tcd\nef\r ")); + /* Basic ASCII */ + assert_string_equal_free("Foobar", purple_utf8_strip_unprintables("Foobar")); + /* 0xE000 - 0xFFFD (UTF-8 encoded) */ + /* U+F1F7 */ + assert_string_equal_free("aaaa\xef\x87\xb7", purple_utf8_strip_unprintables("aaaa\xef\x87\xb7")); +#if 0 + /* disabled because make check fails on an assertion */ + /* U+DB80 (Private Use High Surrogate, First) -- should be stripped */ + assert_string_equal_free("aaaa", purple_utf8_strip_unprintables("aaaa\xed\xa0\x80")); + /* U+FFFE (should be stripped) */ + assert_string_equal_free("aaaa", purple_utf8_strip_unprintables("aaaa\xef\xbf\xbe")); +#endif + /* U+FEFF (should not be stripped) */ + assert_string_equal_free("aaaa\xef\xbb\xbf", purple_utf8_strip_unprintables("aaaa\xef\xbb\xbf")); +} +END_TEST + START_TEST(test_mime_decode_field) { gchar *result = purple_mime_decode_field("=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="); @@ -168,6 +195,10 @@ tcase_add_test(tc, test_markup_html_to_xhtml); suite_add_tcase(s, tc); + tc = tcase_create("Stripping Unparseables"); + tcase_add_test(tc, test_utf8_strip_unprintables); + suite_add_tcase(s, tc); + tc = tcase_create("MIME"); tcase_add_test(tc, test_mime_decode_field); suite_add_tcase(s, tc); diff -r 2a436e0ce977 -r 5bac51b394e6 libpurple/util.c --- a/libpurple/util.c Thu Apr 29 05:58:02 2010 +0000 +++ b/libpurple/util.c Thu Apr 29 17:17:00 2010 +0000 @@ -4593,12 +4593,22 @@ } workstr = iter = g_new(gchar, strlen(str) + 1); - for ( ; *str; ++str) { - guchar c = *str; - if (c >= 0x20 || c == '\t' || c == '\n' || c == '\r') { - *iter = c; - ++iter; + while (*str) { + gunichar ch = g_utf8_get_char(str); + gchar *next = g_utf8_next_char(str); + /* + * Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | + * [#x10000-#x10FFFF] + */ + if ((ch == '\t' || ch == '\n' || ch == '\r') || + (ch >= 0x20 && ch <= 0xD7FF) || + (ch >= 0xE000 && ch <= 0xFFFD) || + (ch >= 0x10000 && ch <= 0x10FFFF)) { + memcpy(iter, str, next - str); + iter += (next - str); } + + str = next; } /* nul-terminate the new string */