changeset 27674:e5fdfff98aa9

When GNU Libidn is available, use it for XMPP stringprep operations. I made configure fail if libidn is unavailable and force_deps is set because glib's UTF-8 strdown and casefold operations fail one of the tests I've updated (based on running the tests with libidn). Running without libidn will still work in almost every case because people use all-ASCII JabberIDs and I had to search a fair amount to find characters for which GLib failed. This shouldn't have a performance impact on top of Mark's optimizations for all-ASCII JIDs.
author Paul Aurich <paul@darkrain42.org>
date Fri, 17 Jul 2009 21:11:25 +0000
parents 53c6b8d95ea5
children 409ef6d76bf6
files ChangeLog configure.ac libpurple/protocols/jabber/Makefile.am libpurple/protocols/jabber/jutil.c libpurple/protocols/jabber/jutil.h libpurple/tests/test_jabber_jutil.c
diffstat 6 files changed, 233 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Fri Jul 17 02:19:31 2009 +0000
+++ b/ChangeLog	Fri Jul 17 21:11:25 2009 +0000
@@ -105,6 +105,9 @@
 	  markup if they support it.
 	* Removed support for obsoleted XEP-0022 (Message Events) and XEP-0091
 	  (Legacy Entity Time).
+	* When the GNU IDN library (libidn) is available, it is used for
+	  normalization of Jabber IDs. When unavailable, internal routines are
+	  used (as in previous versions).
 
 	Yahoo!/Yahoo! JAPAN:
 	* P2P file transfers.  (Sulabh Mahajan)
--- a/configure.ac	Fri Jul 17 02:19:31 2009 +0000
+++ b/configure.ac	Fri Jul 17 21:11:25 2009 +0000
@@ -808,6 +808,25 @@
 	fi
 fi
 
+AC_ARG_ENABLE(idn,
+	[AC_HELP_STRING([--disable-idn], [compile without IDN support])],
+	[enable_idn="$enableval" force_idn=$enableval], [enable_idn="yes" force_idn=no])
+if test "x$enable_idn" != "xno"; then
+	PKG_CHECK_MODULES(IDN, libidn >= 0.0.0, [
+		AC_DEFINE(USE_IDN, 1, [Use GNU Libidn for stringprep and IDN])
+		AC_SUBST(IDN_CFLAGS)
+		AC_SUBST(IDN_LIBS)
+	], [
+		AC_MSG_RESULT(no)
+		if test "x$force_deps" = "xyes" ; then
+			AC_MSG_ERROR([
+GNU Libidn development headers not found.
+Use --disable-idn if you do not need it.
+])
+		fi
+	])
+fi
+
 dnl #######################################################################
 dnl # Check for Meanwhile headers (for Sametime)
 dnl #######################################################################
@@ -2546,6 +2565,7 @@
 if test "x$enable_dbus" = "xyes" ; then
 	eval eval echo D-Bus services directory...... : $DBUS_SERVICES_DIR
 fi
+echo Build with GNU Libidn......... : $enable_idn
 echo Build with NetworkManager..... : $enable_nm
 echo SSL Library/Libraries......... : $msg_ssl
 if test "x$SSL_CERTIFICATES_DIR" != "x" ; then
--- a/libpurple/protocols/jabber/Makefile.am	Fri Jul 17 02:19:31 2009 +0000
+++ b/libpurple/protocols/jabber/Makefile.am	Fri Jul 17 21:11:25 2009 +0000
@@ -88,7 +88,7 @@
 st =
 pkg_LTLIBRARIES      = libjabber.la libxmpp.la
 libjabber_la_SOURCES = $(JABBERSOURCES)
-libjabber_la_LIBADD  = $(GLIB_LIBS) $(SASL_LIBS) $(LIBXML_LIBS)
+libjabber_la_LIBADD  = $(GLIB_LIBS) $(SASL_LIBS) $(LIBXML_LIBS) $(IDN_LIBS)
 
 libxmpp_la_SOURCES = libxmpp.c
 libxmpp_la_LIBADD = libjabber.la
@@ -100,4 +100,5 @@
 	-I$(top_builddir)/libpurple \
 	$(DEBUG_CFLAGS) \
 	$(GLIB_CFLAGS) \
+	$(IDN_CFLAGS) \
 	$(LIBXML_CFLAGS)
--- a/libpurple/protocols/jabber/jutil.c	Fri Jul 17 02:19:31 2009 +0000
+++ b/libpurple/protocols/jabber/jutil.c	Fri Jul 17 21:11:25 2009 +0000
@@ -31,9 +31,165 @@
 #include "presence.h"
 #include "jutil.h"
 
+#ifdef USE_IDN
+#include <idna.h>
+#include <stringprep.h>
+static char idn_buffer[1024];
+#endif
+
+gchar *jabber_try_idna_to_ascii(const char *input)
+{
+#ifndef USE_IDN
+	return g_strdup(input);
+#else
+	gchar *out;
+	char *tmp;
+
+	g_return_val_if_fail(input != NULL, NULL);
+	g_return_val_if_fail(*input != '\0', NULL);
+
+	if (idna_to_ascii_8z(input, &tmp, IDNA_USE_STD3_ASCII_RULES) != IDNA_SUCCESS) {
+		return NULL;
+	}
+
+	out = g_strdup(tmp);
+	/* This *MUST* be freed with free, not g_free */
+	free(tmp);
+	return out;
+#endif
+}
+
+#ifdef USE_IDN
+static gboolean jabber_nodeprep(char *str, size_t buflen)
+{
+	return stringprep_xmpp_nodeprep(str, buflen) == STRINGPREP_OK;
+}
+
+static gboolean jabber_resourceprep(char *str, size_t buflen)
+{
+	return stringprep_xmpp_resourceprep(str, buflen) == STRINGPREP_OK;
+}
+
+static JabberID*
+jabber_idn_validate(const char *str, const char *at, const char *slash,
+                    const char *null)
+{
+	const char *node = NULL;
+	const char *domain = NULL;
+	const char *resource = NULL;
+	int node_len = 0;
+	int domain_len = 0;
+	int resource_len = 0;
+	char *out;
+	JabberID *jid;
+
+	/* Ensure no parts are > 1023 bytes */
+	if (at) {
+		node = str;
+		node_len = at - str;
+
+		domain = at + 1;
+		if (slash) {
+			domain_len = slash - (at + 1);
+			resource = slash + 1;
+			resource_len = null - (slash + 1);
+		} else {
+			domain_len = null - (at + 1);
+		}
+	} else {
+		domain = str;
+
+		if (slash) {
+			domain_len = slash - str;
+			resource = slash;
+			resource_len = null - (slash + 1);
+		} else {
+			domain_len = null - (str + 1);
+		}
+	}
+
+	if (node && node_len > 1023)
+		return NULL;
+	if (domain_len > 1023)
+		return NULL;
+	if (resource && resource_len > 1023)
+		return NULL;
+
+	jid = g_new0(JabberID, 1);
+
+	if (node) {
+		strncpy(idn_buffer, node, node_len);
+		idn_buffer[node_len] = '\0';
+
+		if (!jabber_nodeprep(idn_buffer, sizeof(idn_buffer))) {
+			jabber_id_free(jid);
+			jid = NULL;
+			goto out;
+		}
+
+		jid->node = g_strdup(idn_buffer);
+	}
+
+	/* domain *must* be here */
+	strncpy(idn_buffer, domain, domain_len);
+	idn_buffer[domain_len] = '\0';
+	if (domain[0] == '[') { /* IPv6 address */
+		gboolean valid = FALSE;
+
+		if (idn_buffer[domain_len - 1] == ']') {
+			idn_buffer[domain_len - 1] = '\0';
+			valid = purple_ipv6_address_is_valid(idn_buffer + 1);
+		}
+
+		if (!valid) {
+			jabber_id_free(jid);
+			jid = NULL;
+			goto out;
+		}
+	} else {
+		/* Apply nameprep */
+		if (stringprep_nameprep(idn_buffer, sizeof(idn_buffer)) != STRINGPREP_OK) {
+			jabber_id_free(jid);
+			jid = NULL;
+			goto out;
+		}
+
+		/* And now ToASCII */
+		if (idna_to_ascii_8z(idn_buffer, &out, IDNA_USE_STD3_ASCII_RULES) != IDNA_SUCCESS) {
+			jabber_id_free(jid);
+			jid = NULL;
+			goto out;
+		}
+
+		/* This *MUST* be freed using 'free', not 'g_free' */
+		free(out);
+		jid->domain = g_strdup(idn_buffer);
+	}
+
+	if (resource) {
+		strncpy(idn_buffer, resource, resource_len);
+		idn_buffer[resource_len] = '\0';
+
+		if (!jabber_resourceprep(idn_buffer, sizeof(idn_buffer))) {
+			jabber_id_free(jid);
+			jid = NULL;
+			/* goto out; */
+		}
+	}
+
+out:
+	return jid;
+}
+
+#endif /* USE_IDN */
+
 gboolean jabber_nodeprep_validate(const char *str)
 {
+#ifdef USE_IDN
+	gboolean result;
+#else
 	const char *c;
+#endif
 
 	if(!str)
 		return TRUE;
@@ -41,6 +197,12 @@
 	if(strlen(str) > 1023)
 		return FALSE;
 
+#ifdef USE_IDN
+	strncpy(idn_buffer, str, sizeof(idn_buffer) - 1);
+	idn_buffer[sizeof(idn_buffer) - 1] = '\0';
+	result = jabber_nodeprep(idn_buffer, sizeof(idn_buffer));
+	return result;
+#else /* USE_IDN */
 	c = str;
 	while(c && *c) {
 		gunichar ch = g_utf8_get_char(c);
@@ -52,6 +214,7 @@
 	}
 
 	return TRUE;
+#endif /* USE_IDN */
 }
 
 gboolean jabber_domain_validate(const char *str)
@@ -101,7 +264,11 @@
 
 gboolean jabber_resourceprep_validate(const char *str)
 {
+#ifdef USE_IDN
+	gboolean result;
+#else
 	const char *c;
+#endif
 
 	if(!str)
 		return TRUE;
@@ -109,6 +276,12 @@
 	if(strlen(str) > 1023)
 		return FALSE;
 
+#ifdef USE_IDN
+	strncpy(idn_buffer, str, sizeof(idn_buffer) - 1);
+	idn_buffer[sizeof(idn_buffer) - 1] = '\0';
+	result = jabber_resourceprep(idn_buffer, sizeof(idn_buffer));
+	return result;
+#else /* USE_IDN */
 	c = str;
 	while(c && *c) {
 		gunichar ch = g_utf8_get_char(c);
@@ -119,9 +292,9 @@
 	}
 
 	return TRUE;
+#endif /* USE_IDN */
 }
 
-
 JabberID*
 jabber_id_new(const char *str)
 {
@@ -132,8 +305,10 @@
 #if 0
 	gboolean node_is_required = FALSE;
 #endif
+#ifndef USE_IDN
 	char *node = NULL;
 	char *domain;
+#endif
 	JabberID *jid;
 
 	if (!str)
@@ -253,23 +428,27 @@
 	if (!g_utf8_validate(str, -1, NULL))
 		return NULL;
 
+#ifdef USE_IDN
+	return jabber_idn_validate(str, at, slash, c /* points to the null */);
+#else /* USE_IDN */
+
 	jid = g_new0(JabberID, 1);
 
 	/* normalization */
 	if(at) {
-		node = g_utf8_strdown(str, at-str);
+		node = g_utf8_casefold(str, at-str);
 		if(slash) {
-			domain = g_utf8_strdown(at+1, slash-(at+1));
+			domain = g_utf8_casefold(at+1, slash-(at+1));
 			jid->resource = g_utf8_normalize(slash+1, -1, G_NORMALIZE_NFKC);
 		} else {
-			domain = g_utf8_strdown(at+1, -1);
+			domain = g_utf8_casefold(at+1, -1);
 		}
 	} else {
 		if(slash) {
-			domain = g_utf8_strdown(str, slash-str);
+			domain = g_utf8_casefold(str, slash-str);
 			jid->resource = g_utf8_normalize(slash+1, -1, G_NORMALIZE_NFKC);
 		} else {
-			domain = g_utf8_strdown(str, -1);
+			domain = g_utf8_casefold(str, -1);
 		}
 	}
 
@@ -292,6 +471,7 @@
 	}
 
 	return jid;
+#endif /* USE_IDN */
 }
 
 void
--- a/libpurple/protocols/jabber/jutil.h	Fri Jul 17 02:19:31 2009 +0000
+++ b/libpurple/protocols/jabber/jutil.h	Fri Jul 17 21:11:25 2009 +0000
@@ -44,6 +44,11 @@
 /* Returns true if JID is the bare JID of our account. */
 gboolean jabber_is_own_account(JabberStream *js, const char *jid);
 
+/* Try to convert an IDNA domain name to something we can pass to a DNS lookup.
+ * If IDN support is not available, returns a copy of the input string.
+ */
+gchar *jabber_try_idna_to_ascii(const gchar *input);
+
 gboolean jabber_nodeprep_validate(const char *);
 gboolean jabber_domain_validate(const char *);
 gboolean jabber_resourceprep_validate(const char *);
--- a/libpurple/tests/test_jabber_jutil.c	Fri Jul 17 02:19:31 2009 +0000
+++ b/libpurple/tests/test_jabber_jutil.c	Fri Jul 17 21:11:25 2009 +0000
@@ -44,6 +44,10 @@
 	longnode = g_strnfill(1023, 'a');
 	fail_unless(jabber_nodeprep_validate(longnode));
 	g_free(longnode);
+
+	longnode = g_strnfill(1024, 'a');
+	fail_if(jabber_nodeprep_validate(longnode));
+	g_free(longnode);
 }
 END_TEST
 
@@ -132,7 +136,19 @@
 	/* Ensure that jabber_id_new is properly lowercasing node and domains */
 	assert_jid_parts("paul", "darkrain42.org", "PaUL@darkrain42.org");
 	assert_jid_parts("paul", "darkrain42.org", "paul@DaRkRaIn42.org");
-	assert_jid_parts("ꙥ", "darkrain42.org", "Ꙥ@darkrain42.org");
+
+	/* These case-mapping tests culled from examining RFC3454 B.2 */
+
+	/* Cyrillic capital EF (U+0424) maps to lowercase EF (U+0444) */
+	assert_jid_parts("ф", "darkrain42.org", "Ф@darkrain42.org");
+	/*
+	 * These character (U+A664 and U+A665) are not mapped to anything in
+	 * RFC3454 B.2. This first test *fails* when not using IDN because glib's
+	 * case-folding/utf8_strdown improperly lowercases the character.
+	 */
+	assert_jid_parts("Ꙥ", "darkrain42.org", "Ꙥ@darkrain42.org");
+	assert_jid_parts("ꙥ", "darkrain42.org", "ꙥ@darkrain42.org");
+	/* U+04E9 to U+04E9 */
 	assert_jid_parts("paul", "өarkrain42.org", "paul@Өarkrain42.org");
 }
 END_TEST