# HG changeset patch # User Tim Ringenbach # Date 1086548782 0 # Node ID 316b1afb5974aae9c6c77bb12d0c3a52c80325b1 # Parent 8054855f2bb9b93363adcac75630e481323a66fa [gaim-migrate @ 10018] nosnilmot make autolinkification work even better. This patch improves the way we auto-linkify detected URLs and email addresses: - if a URL is enclosed in parenthesis eg (http://www.google.com/), the closing parenthesis will not be included as part of the link - email addresses inside parenthesis or <>'s will correctly be linked - either whitespace or non-ascii characters are recognized as boundaries for email addresses, which will allow auto linking of email addresses for Chinese conversations which don't use whitespace: wing: you know you mentioned being able to auto-linkify email addresses in Chinese text (without whitespace), do you want to try a patch out for me? stu|laptop: eh, actually i found that gaim cannot cannot? at all? stu|laptop: it seems so stu|laptop> even with this? stu|laptop: i'll check it out then :) ... ... stu|laptop: it works perfectly with the patch :) committer: Tailor Script diff -r 8054855f2bb9 -r 316b1afb5974 src/util.c --- a/src/util.c Sun Jun 06 18:45:06 2004 +0000 +++ b/src/util.c Sun Jun 06 19:06:22 2004 +0000 @@ -1356,12 +1356,21 @@ const char *c, *t, *q = NULL; char *tmp, *tmpurlbuf; char url_buf[BUF_LEN * 4]; + gunichar g; gboolean inside_html = FALSE; + int inside_paren = 0; GString *ret = g_string_new(""); /* Assumes you have a buffer able to cary at least BUF_LEN * 2 bytes */ c = text; while (*c) { + + if(*c == '(' && !inside_html) { + inside_paren++; + ret = g_string_append_c(ret, *c); + c++; + } + if(inside_html) { if(*c == '>') { inside_html = FALSE; @@ -1398,6 +1407,9 @@ if (*(t - 1) == '.') t--; + if ((*(t - 1) == ')' && (inside_paren > 0))) { + t--; + } strncpy(url_buf, c, t - c); url_buf[t - c] = 0; tmpurlbuf = gaim_unescape_html(url_buf); @@ -1428,6 +1440,9 @@ if (*(t - 1) == '.') t--; + if ((*(t - 1) == ')' && (inside_paren > 0))) { + t--; + } strncpy(url_buf, c, t - c); url_buf[t - c] = 0; tmpurlbuf = gaim_unescape_html(url_buf); @@ -1449,6 +1464,9 @@ if (badchar(*t)) { if (*(t - 1) == '.') t--; + if ((*(t - 1) == ')' && (inside_paren > 0))) { + t--; + } strncpy(url_buf, c, t - c); url_buf[t - c] = 0; tmpurlbuf = gaim_unescape_html(url_buf); @@ -1473,6 +1491,9 @@ } if (*(t - 1) == '.') t--; + if ((*(t - 1) == ')' && (inside_paren > 0))) { + t--; + } strncpy(url_buf, c, t - c); url_buf[t - c] = 0; tmpurlbuf = gaim_unescape_html(url_buf); @@ -1509,9 +1530,8 @@ } } else if (c != text && (*c == '@')) { - char *tmp; int flag; - int len = 0; + GString *gurl_buf; const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0"; url_buf[0] = 0; @@ -1521,20 +1541,19 @@ flag = 1; t = c; + gurl_buf = g_string_new(""); while (flag) { - if (badchar(*t)) { - ret = g_string_truncate(ret, ret->len - (len - 1)); + /* iterate backwards grabbing the local part of an email address */ + g = g_utf8_get_char(t); + if (badchar(*t) || (g >= 127) || (*t == '(') || + ((*t == ';') && (t > (text+2)) && !g_ascii_strncasecmp(t - 3, "<", 4))) { + /* local part will already be part of ret, strip it out */ + ret = g_string_truncate(ret, ret->len - (c - t)); + ret = g_string_append_unichar(ret, g); break; } else { - len++; - tmp = g_malloc(len + 1); - tmp[len] = 0; - tmp[0] = *t; - strncpy(tmp + 1, url_buf, len - 1); - strcpy(url_buf, tmp); - url_buf[len] = 0; - g_free(tmp); - t--; + g_string_prepend_unichar(gurl_buf, g); + t = g_utf8_find_prev_char(text, t); if (t < text) { ret = g_string_assign(ret, ""); break; @@ -1542,12 +1561,18 @@ } } - t = c + 1; + t = g_utf8_find_next_char(c, NULL); while (flag) { - if (badchar(*t)) { + /* iterate forwards grabbing the domain part of an email address */ + g = g_utf8_get_char(t); + if (badchar(*t) || (g >= 127) || (*t == ')') || + ((*t == '&') && !g_ascii_strncasecmp(t, ">", 4))) { char *d; + strcpy(url_buf, gurl_buf->str); + + /* strip off trailing periods */ for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--) *d = '\0'; @@ -1563,13 +1588,17 @@ break; } else { - strncat(url_buf, t, 1); - len++; - url_buf[len] = 0; + g_string_append_unichar(gurl_buf, g); + t = g_utf8_find_next_char(t, NULL); } - - t++; } + g_string_free(gurl_buf, TRUE); + } + + if(*c == ')' && !inside_html) { + inside_paren--; + ret = g_string_append_c(ret, *c); + c++; } if (*c == 0)