diff libpurple/protocols/jabber/jutil.c @ 27530:1b580473e753

Check in a version of jabber_id_new() that is hopefully more efficient. I think it's less efficient than the version I originally sent to the devel list and accidentally checked in a day or three ago. But it's also correct, and passes all our unit tests. I think it can be optimized a little further by filling in characters in the default case of the switch statement (see "implement_me") and remove the #if 0 lines. It's ok if the "implement_me" checks don't list every valid character-- the worst that happens is the check falls through to our nodeprep and resource prep.
author Mark Doliner <mark@kingant.net>
date Fri, 10 Jul 2009 06:37:13 +0000
parents 1c61906755fe
children 04d8452dee48
line wrap: on
line diff
--- a/libpurple/protocols/jabber/jutil.c	Fri Jul 10 06:07:27 2009 +0000
+++ b/libpurple/protocols/jabber/jutil.c	Fri Jul 10 06:37:13 2009 +0000
@@ -103,20 +103,139 @@
 JabberID*
 jabber_id_new(const char *str)
 {
-	char *at;
-	char *slash;
+	const char *at = NULL;
+	const char *slash = NULL;
+	const char *cur;
+	gunichar c;
+	gboolean needs_validation = FALSE;
+#if 0
+	gboolean node_is_required = FALSE;
+#endif
 	char *node = NULL;
 	char *domain;
 	JabberID *jid;
 
-	if(!str || !g_utf8_validate(str, -1, NULL))
+	if (!str)
+		return NULL;
+
+	for (cur = str; *cur != '\0'; cur = g_utf8_next_char(cur))
+	{
+		c = g_utf8_get_char(cur);
+		switch (c) {
+			case '@':
+				if (!slash) {
+					if (at) {
+						/* Multiple @'s in the node/domain portion, not a valid JID! */
+						return NULL;
+					}
+					if (cur == str) {
+						/* JIDs cannot start with @ */
+						return NULL;
+					}
+					if ((g_utf8_next_char(cur))[0] == '\0') {
+						/* JIDs cannot end with @ */
+						return NULL;
+					}
+					at = cur;
+				}
+				break;
+
+			case '/':
+				if (!slash) {
+					if (cur == str) {
+						/* JIDs cannot start with / */
+						return NULL;
+					}
+					if ((g_utf8_next_char(cur))[0] == '\0') {
+						/* JIDs cannot end with / */
+						return NULL;
+					}
+					slash = cur;
+				}
+				break;
+
+			default:
+				/* characters allowed everywhere */
+				if ((c > 'a' && c < 'z')
+						|| (c > '0' && c < '9')
+						|| (c > 'A' && c < 'Z')
+						|| c == '.' || c == '-')
+					/* We're good */
+					break;
+
+#if 0
+				if (slash != NULL) {
+					/* characters allowed only in the resource */
+					if (implement_me)
+						/* We're good */
+						break;
+				}
+
+				/* characters allowed only in the node */
+				if (implement_me) {
+					/*
+					 * Ok, this character is valid, but only if it's a part
+					 * of the node and not the domain.  But we don't know
+					 * if "c" is a part of the node or the domain until after
+					 * we've found the @.  So set a flag for now and check
+					 * that we found an @ later.
+					 */
+					node_is_required = TRUE;
+					break;
+				}
+#endif
+
+				/*
+				 * Hmm, this character is a bit more exotic.  Better fall
+				 * back to using the more expensive UTF-8 compliant
+				 * stringprep functions.
+				 */
+				needs_validation = TRUE;
+				break;
+		}
+	}
+
+#if 0
+	if (node_is_required && at == NULL)
+		/* Found invalid characters in the domain */
+		return NULL;
+#endif
+
+	if (!needs_validation) {
+		/* JID is made of only ASCII characters--just lowercase and return */
+		jid = g_new0(JabberID, 1);
+
+		if (at) {
+			jid->node = g_ascii_strdown(str, at - str);
+			if (slash) {
+				jid->domain = g_ascii_strdown(at + 1, slash - (at + 1));
+				jid->resource = g_strdup(slash + 1);
+			} else {
+				jid->domain = g_ascii_strdown(at + 1, -1);
+			}
+		} else {
+			if (slash) {
+				jid->domain = g_ascii_strdown(str, slash - str);
+				jid->resource = g_strdup(slash + 1);
+			} else {
+				jid->domain = g_ascii_strdown(str, -1);
+			}
+		}
+		return jid;
+	}
+
+	/*
+	 * If we get here, there are some non-ASCII chars in the string, so
+	 * we'll need to validate it, normalize, and finally do a full jabber
+	 * nodeprep on the jid.
+	 */
+
+	if (!g_utf8_validate(str, -1, NULL))
 		return NULL;
 
 	jid = g_new0(JabberID, 1);
 
-	at = g_utf8_strchr(str, -1, '@');
-	slash = g_utf8_strchr(str, -1, '/');
-
+	/* normalization */
 	if(at) {
 		node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC);
 		if(slash) {
@@ -144,6 +263,7 @@
 		g_free(domain);
 	}
 
+	/* and finally the jabber nodeprep */
 	if(!jabber_nodeprep_validate(jid->node) ||
 			!jabber_nameprep_validate(jid->domain) ||
 			!jabber_resourceprep_validate(jid->resource)) {