# HG changeset patch # User Paul Aurich # Date 1247865085 0 # Node ID e5fdfff98aa9e9e1104d7e640f064a84e810d4f6 # Parent 53c6b8d95ea573b78b9210e409a6d7d494386455 When GNU Libidn is available, use it for XMPP stringprep operations. I made configure fail if libidn is unavailable and force_deps is set because glib's UTF-8 strdown and casefold operations fail one of the tests I've updated (based on running the tests with libidn). Running without libidn will still work in almost every case because people use all-ASCII JabberIDs and I had to search a fair amount to find characters for which GLib failed. This shouldn't have a performance impact on top of Mark's optimizations for all-ASCII JIDs. diff -r 53c6b8d95ea5 -r e5fdfff98aa9 ChangeLog --- a/ChangeLog Fri Jul 17 02:19:31 2009 +0000 +++ b/ChangeLog Fri Jul 17 21:11:25 2009 +0000 @@ -105,6 +105,9 @@ markup if they support it. * Removed support for obsoleted XEP-0022 (Message Events) and XEP-0091 (Legacy Entity Time). + * When the GNU IDN library (libidn) is available, it is used for + normalization of Jabber IDs. When unavailable, internal routines are + used (as in previous versions). Yahoo!/Yahoo! JAPAN: * P2P file transfers. (Sulabh Mahajan) diff -r 53c6b8d95ea5 -r e5fdfff98aa9 configure.ac --- a/configure.ac Fri Jul 17 02:19:31 2009 +0000 +++ b/configure.ac Fri Jul 17 21:11:25 2009 +0000 @@ -808,6 +808,25 @@ fi fi +AC_ARG_ENABLE(idn, + [AC_HELP_STRING([--disable-idn], [compile without IDN support])], + [enable_idn="$enableval" force_idn=$enableval], [enable_idn="yes" force_idn=no]) +if test "x$enable_idn" != "xno"; then + PKG_CHECK_MODULES(IDN, libidn >= 0.0.0, [ + AC_DEFINE(USE_IDN, 1, [Use GNU Libidn for stringprep and IDN]) + AC_SUBST(IDN_CFLAGS) + AC_SUBST(IDN_LIBS) + ], [ + AC_MSG_RESULT(no) + if test "x$force_deps" = "xyes" ; then + AC_MSG_ERROR([ +GNU Libidn development headers not found. +Use --disable-idn if you do not need it. +]) + fi + ]) +fi + dnl ####################################################################### dnl # Check for Meanwhile headers (for Sametime) dnl ####################################################################### @@ -2546,6 +2565,7 @@ if test "x$enable_dbus" = "xyes" ; then eval eval echo D-Bus services directory...... : $DBUS_SERVICES_DIR fi +echo Build with GNU Libidn......... : $enable_idn echo Build with NetworkManager..... : $enable_nm echo SSL Library/Libraries......... : $msg_ssl if test "x$SSL_CERTIFICATES_DIR" != "x" ; then diff -r 53c6b8d95ea5 -r e5fdfff98aa9 libpurple/protocols/jabber/Makefile.am --- a/libpurple/protocols/jabber/Makefile.am Fri Jul 17 02:19:31 2009 +0000 +++ b/libpurple/protocols/jabber/Makefile.am Fri Jul 17 21:11:25 2009 +0000 @@ -88,7 +88,7 @@ st = pkg_LTLIBRARIES = libjabber.la libxmpp.la libjabber_la_SOURCES = $(JABBERSOURCES) -libjabber_la_LIBADD = $(GLIB_LIBS) $(SASL_LIBS) $(LIBXML_LIBS) +libjabber_la_LIBADD = $(GLIB_LIBS) $(SASL_LIBS) $(LIBXML_LIBS) $(IDN_LIBS) libxmpp_la_SOURCES = libxmpp.c libxmpp_la_LIBADD = libjabber.la @@ -100,4 +100,5 @@ -I$(top_builddir)/libpurple \ $(DEBUG_CFLAGS) \ $(GLIB_CFLAGS) \ + $(IDN_CFLAGS) \ $(LIBXML_CFLAGS) diff -r 53c6b8d95ea5 -r e5fdfff98aa9 libpurple/protocols/jabber/jutil.c --- a/libpurple/protocols/jabber/jutil.c Fri Jul 17 02:19:31 2009 +0000 +++ b/libpurple/protocols/jabber/jutil.c Fri Jul 17 21:11:25 2009 +0000 @@ -31,9 +31,165 @@ #include "presence.h" #include "jutil.h" +#ifdef USE_IDN +#include +#include +static char idn_buffer[1024]; +#endif + +gchar *jabber_try_idna_to_ascii(const char *input) +{ +#ifndef USE_IDN + return g_strdup(input); +#else + gchar *out; + char *tmp; + + g_return_val_if_fail(input != NULL, NULL); + g_return_val_if_fail(*input != '\0', NULL); + + if (idna_to_ascii_8z(input, &tmp, IDNA_USE_STD3_ASCII_RULES) != IDNA_SUCCESS) { + return NULL; + } + + out = g_strdup(tmp); + /* This *MUST* be freed with free, not g_free */ + free(tmp); + return out; +#endif +} + +#ifdef USE_IDN +static gboolean jabber_nodeprep(char *str, size_t buflen) +{ + return stringprep_xmpp_nodeprep(str, buflen) == STRINGPREP_OK; +} + +static gboolean jabber_resourceprep(char *str, size_t buflen) +{ + return stringprep_xmpp_resourceprep(str, buflen) == STRINGPREP_OK; +} + +static JabberID* +jabber_idn_validate(const char *str, const char *at, const char *slash, + const char *null) +{ + const char *node = NULL; + const char *domain = NULL; + const char *resource = NULL; + int node_len = 0; + int domain_len = 0; + int resource_len = 0; + char *out; + JabberID *jid; + + /* Ensure no parts are > 1023 bytes */ + if (at) { + node = str; + node_len = at - str; + + domain = at + 1; + if (slash) { + domain_len = slash - (at + 1); + resource = slash + 1; + resource_len = null - (slash + 1); + } else { + domain_len = null - (at + 1); + } + } else { + domain = str; + + if (slash) { + domain_len = slash - str; + resource = slash; + resource_len = null - (slash + 1); + } else { + domain_len = null - (str + 1); + } + } + + if (node && node_len > 1023) + return NULL; + if (domain_len > 1023) + return NULL; + if (resource && resource_len > 1023) + return NULL; + + jid = g_new0(JabberID, 1); + + if (node) { + strncpy(idn_buffer, node, node_len); + idn_buffer[node_len] = '\0'; + + if (!jabber_nodeprep(idn_buffer, sizeof(idn_buffer))) { + jabber_id_free(jid); + jid = NULL; + goto out; + } + + jid->node = g_strdup(idn_buffer); + } + + /* domain *must* be here */ + strncpy(idn_buffer, domain, domain_len); + idn_buffer[domain_len] = '\0'; + if (domain[0] == '[') { /* IPv6 address */ + gboolean valid = FALSE; + + if (idn_buffer[domain_len - 1] == ']') { + idn_buffer[domain_len - 1] = '\0'; + valid = purple_ipv6_address_is_valid(idn_buffer + 1); + } + + if (!valid) { + jabber_id_free(jid); + jid = NULL; + goto out; + } + } else { + /* Apply nameprep */ + if (stringprep_nameprep(idn_buffer, sizeof(idn_buffer)) != STRINGPREP_OK) { + jabber_id_free(jid); + jid = NULL; + goto out; + } + + /* And now ToASCII */ + if (idna_to_ascii_8z(idn_buffer, &out, IDNA_USE_STD3_ASCII_RULES) != IDNA_SUCCESS) { + jabber_id_free(jid); + jid = NULL; + goto out; + } + + /* This *MUST* be freed using 'free', not 'g_free' */ + free(out); + jid->domain = g_strdup(idn_buffer); + } + + if (resource) { + strncpy(idn_buffer, resource, resource_len); + idn_buffer[resource_len] = '\0'; + + if (!jabber_resourceprep(idn_buffer, sizeof(idn_buffer))) { + jabber_id_free(jid); + jid = NULL; + /* goto out; */ + } + } + +out: + return jid; +} + +#endif /* USE_IDN */ + gboolean jabber_nodeprep_validate(const char *str) { +#ifdef USE_IDN + gboolean result; +#else const char *c; +#endif if(!str) return TRUE; @@ -41,6 +197,12 @@ if(strlen(str) > 1023) return FALSE; +#ifdef USE_IDN + strncpy(idn_buffer, str, sizeof(idn_buffer) - 1); + idn_buffer[sizeof(idn_buffer) - 1] = '\0'; + result = jabber_nodeprep(idn_buffer, sizeof(idn_buffer)); + return result; +#else /* USE_IDN */ c = str; while(c && *c) { gunichar ch = g_utf8_get_char(c); @@ -52,6 +214,7 @@ } return TRUE; +#endif /* USE_IDN */ } gboolean jabber_domain_validate(const char *str) @@ -101,7 +264,11 @@ gboolean jabber_resourceprep_validate(const char *str) { +#ifdef USE_IDN + gboolean result; +#else const char *c; +#endif if(!str) return TRUE; @@ -109,6 +276,12 @@ if(strlen(str) > 1023) return FALSE; +#ifdef USE_IDN + strncpy(idn_buffer, str, sizeof(idn_buffer) - 1); + idn_buffer[sizeof(idn_buffer) - 1] = '\0'; + result = jabber_resourceprep(idn_buffer, sizeof(idn_buffer)); + return result; +#else /* USE_IDN */ c = str; while(c && *c) { gunichar ch = g_utf8_get_char(c); @@ -119,9 +292,9 @@ } return TRUE; +#endif /* USE_IDN */ } - JabberID* jabber_id_new(const char *str) { @@ -132,8 +305,10 @@ #if 0 gboolean node_is_required = FALSE; #endif +#ifndef USE_IDN char *node = NULL; char *domain; +#endif JabberID *jid; if (!str) @@ -253,23 +428,27 @@ if (!g_utf8_validate(str, -1, NULL)) return NULL; +#ifdef USE_IDN + return jabber_idn_validate(str, at, slash, c /* points to the null */); +#else /* USE_IDN */ + jid = g_new0(JabberID, 1); /* normalization */ if(at) { - node = g_utf8_strdown(str, at-str); + node = g_utf8_casefold(str, at-str); if(slash) { - domain = g_utf8_strdown(at+1, slash-(at+1)); + domain = g_utf8_casefold(at+1, slash-(at+1)); jid->resource = g_utf8_normalize(slash+1, -1, G_NORMALIZE_NFKC); } else { - domain = g_utf8_strdown(at+1, -1); + domain = g_utf8_casefold(at+1, -1); } } else { if(slash) { - domain = g_utf8_strdown(str, slash-str); + domain = g_utf8_casefold(str, slash-str); jid->resource = g_utf8_normalize(slash+1, -1, G_NORMALIZE_NFKC); } else { - domain = g_utf8_strdown(str, -1); + domain = g_utf8_casefold(str, -1); } } @@ -292,6 +471,7 @@ } return jid; +#endif /* USE_IDN */ } void diff -r 53c6b8d95ea5 -r e5fdfff98aa9 libpurple/protocols/jabber/jutil.h --- a/libpurple/protocols/jabber/jutil.h Fri Jul 17 02:19:31 2009 +0000 +++ b/libpurple/protocols/jabber/jutil.h Fri Jul 17 21:11:25 2009 +0000 @@ -44,6 +44,11 @@ /* Returns true if JID is the bare JID of our account. */ gboolean jabber_is_own_account(JabberStream *js, const char *jid); +/* Try to convert an IDNA domain name to something we can pass to a DNS lookup. + * If IDN support is not available, returns a copy of the input string. + */ +gchar *jabber_try_idna_to_ascii(const gchar *input); + gboolean jabber_nodeprep_validate(const char *); gboolean jabber_domain_validate(const char *); gboolean jabber_resourceprep_validate(const char *); diff -r 53c6b8d95ea5 -r e5fdfff98aa9 libpurple/tests/test_jabber_jutil.c --- a/libpurple/tests/test_jabber_jutil.c Fri Jul 17 02:19:31 2009 +0000 +++ b/libpurple/tests/test_jabber_jutil.c Fri Jul 17 21:11:25 2009 +0000 @@ -44,6 +44,10 @@ longnode = g_strnfill(1023, 'a'); fail_unless(jabber_nodeprep_validate(longnode)); g_free(longnode); + + longnode = g_strnfill(1024, 'a'); + fail_if(jabber_nodeprep_validate(longnode)); + g_free(longnode); } END_TEST @@ -132,7 +136,19 @@ /* Ensure that jabber_id_new is properly lowercasing node and domains */ assert_jid_parts("paul", "darkrain42.org", "PaUL@darkrain42.org"); assert_jid_parts("paul", "darkrain42.org", "paul@DaRkRaIn42.org"); - assert_jid_parts("ꙥ", "darkrain42.org", "Ꙥ@darkrain42.org"); + + /* These case-mapping tests culled from examining RFC3454 B.2 */ + + /* Cyrillic capital EF (U+0424) maps to lowercase EF (U+0444) */ + assert_jid_parts("ф", "darkrain42.org", "Ф@darkrain42.org"); + /* + * These character (U+A664 and U+A665) are not mapped to anything in + * RFC3454 B.2. This first test *fails* when not using IDN because glib's + * case-folding/utf8_strdown improperly lowercases the character. + */ + assert_jid_parts("Ꙥ", "darkrain42.org", "Ꙥ@darkrain42.org"); + assert_jid_parts("ꙥ", "darkrain42.org", "ꙥ@darkrain42.org"); + /* U+04E9 to U+04E9 */ assert_jid_parts("paul", "өarkrain42.org", "paul@Өarkrain42.org"); } END_TEST