# HG changeset patch # User Mark Doliner # Date 1247207833 0 # Node ID 1b580473e7533a9aa4f31c7a084581fe3d1ee9da # Parent c6fea4ae68b39992d8a0070cf7589dc3f3f043d2 Check in a version of jabber_id_new() that is hopefully more efficient. I think it's less efficient than the version I originally sent to the devel list and accidentally checked in a day or three ago. But it's also correct, and passes all our unit tests. I think it can be optimized a little further by filling in characters in the default case of the switch statement (see "implement_me") and remove the #if 0 lines. It's ok if the "implement_me" checks don't list every valid character-- the worst that happens is the check falls through to our nodeprep and resource prep. diff -r c6fea4ae68b3 -r 1b580473e753 libpurple/protocols/jabber/jutil.c --- a/libpurple/protocols/jabber/jutil.c Fri Jul 10 06:07:27 2009 +0000 +++ b/libpurple/protocols/jabber/jutil.c Fri Jul 10 06:37:13 2009 +0000 @@ -103,20 +103,139 @@ JabberID* jabber_id_new(const char *str) { - char *at; - char *slash; + const char *at = NULL; + const char *slash = NULL; + const char *cur; + gunichar c; + gboolean needs_validation = FALSE; +#if 0 + gboolean node_is_required = FALSE; +#endif char *node = NULL; char *domain; JabberID *jid; - if(!str || !g_utf8_validate(str, -1, NULL)) + if (!str) + return NULL; + + for (cur = str; *cur != '\0'; cur = g_utf8_next_char(cur)) + { + c = g_utf8_get_char(cur); + switch (c) { + case '@': + if (!slash) { + if (at) { + /* Multiple @'s in the node/domain portion, not a valid JID! */ + return NULL; + } + if (cur == str) { + /* JIDs cannot start with @ */ + return NULL; + } + if ((g_utf8_next_char(cur))[0] == '\0') { + /* JIDs cannot end with @ */ + return NULL; + } + at = cur; + } + break; + + case '/': + if (!slash) { + if (cur == str) { + /* JIDs cannot start with / */ + return NULL; + } + if ((g_utf8_next_char(cur))[0] == '\0') { + /* JIDs cannot end with / */ + return NULL; + } + slash = cur; + } + break; + + default: + /* characters allowed everywhere */ + if ((c > 'a' && c < 'z') + || (c > '0' && c < '9') + || (c > 'A' && c < 'Z') + || c == '.' || c == '-') + /* We're good */ + break; + +#if 0 + if (slash != NULL) { + /* characters allowed only in the resource */ + if (implement_me) + /* We're good */ + break; + } + + /* characters allowed only in the node */ + if (implement_me) { + /* + * Ok, this character is valid, but only if it's a part + * of the node and not the domain. But we don't know + * if "c" is a part of the node or the domain until after + * we've found the @. So set a flag for now and check + * that we found an @ later. + */ + node_is_required = TRUE; + break; + } +#endif + + /* + * Hmm, this character is a bit more exotic. Better fall + * back to using the more expensive UTF-8 compliant + * stringprep functions. + */ + needs_validation = TRUE; + break; + } + } + +#if 0 + if (node_is_required && at == NULL) + /* Found invalid characters in the domain */ + return NULL; +#endif + + if (!needs_validation) { + /* JID is made of only ASCII characters--just lowercase and return */ + jid = g_new0(JabberID, 1); + + if (at) { + jid->node = g_ascii_strdown(str, at - str); + if (slash) { + jid->domain = g_ascii_strdown(at + 1, slash - (at + 1)); + jid->resource = g_strdup(slash + 1); + } else { + jid->domain = g_ascii_strdown(at + 1, -1); + } + } else { + if (slash) { + jid->domain = g_ascii_strdown(str, slash - str); + jid->resource = g_strdup(slash + 1); + } else { + jid->domain = g_ascii_strdown(str, -1); + } + } + return jid; + } + + /* + * If we get here, there are some non-ASCII chars in the string, so + * we'll need to validate it, normalize, and finally do a full jabber + * nodeprep on the jid. + */ + + if (!g_utf8_validate(str, -1, NULL)) return NULL; jid = g_new0(JabberID, 1); - at = g_utf8_strchr(str, -1, '@'); - slash = g_utf8_strchr(str, -1, '/'); - + /* normalization */ if(at) { node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC); if(slash) { @@ -144,6 +263,7 @@ g_free(domain); } + /* and finally the jabber nodeprep */ if(!jabber_nodeprep_validate(jid->node) || !jabber_nameprep_validate(jid->domain) || !jabber_resourceprep_validate(jid->resource)) {