changeset 9222:316b1afb5974

[gaim-migrate @ 10018] nosnilmot make autolinkification work even better. This patch improves the way we auto-linkify detected URLs and email addresses: - if a URL is enclosed in parenthesis eg (http://www.google.com/), the closing parenthesis will not be included as part of the link - email addresses inside parenthesis or <>'s will correctly be linked - either whitespace or non-ascii characters are recognized as boundaries for email addresses, which will allow auto linking of email addresses for Chinese conversations which don't use whitespace: <stu|laptop> wing: you know you mentioned being able to auto-linkify email addresses in Chinese text (without whitespace), do you want to try a patch out for me? <wing> stu|laptop: eh, actually i found that gaim cannot <stu|laptop> cannot? at all? <wing> stu|laptop: it seems so stu|laptop> even with this? <link to patch> <wing> stu|laptop: i'll check it out then :) ... ... <wing> stu|laptop: it works perfectly with the patch :) committer: Tailor Script <tailor@pidgin.im>
author Tim Ringenbach <marv@pidgin.im>
date Sun, 06 Jun 2004 19:06:22 +0000
parents 8054855f2bb9
children f46154d6de2b
files src/util.c
diffstat 1 files changed, 49 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/src/util.c	Sun Jun 06 18:45:06 2004 +0000
+++ b/src/util.c	Sun Jun 06 19:06:22 2004 +0000
@@ -1356,12 +1356,21 @@
 	const char *c, *t, *q = NULL;
 	char *tmp, *tmpurlbuf;
 	char url_buf[BUF_LEN * 4];
+	gunichar g;
 	gboolean inside_html = FALSE;
+	int inside_paren = 0;
 	GString *ret = g_string_new("");
 	/* Assumes you have a buffer able to cary at least BUF_LEN * 2 bytes */
 
 	c = text;
 	while (*c) {
+
+		if(*c == '(' && !inside_html) {
+			inside_paren++;
+			ret = g_string_append_c(ret, *c);
+			c++;
+		}
+
 		if(inside_html) {
 			if(*c == '>') {
 				inside_html = FALSE;
@@ -1398,6 +1407,9 @@
 
 					if (*(t - 1) == '.')
 						t--;
+					if ((*(t - 1) == ')' && (inside_paren > 0))) {
+						t--;
+					}
 					strncpy(url_buf, c, t - c);
 					url_buf[t - c] = 0;
 					tmpurlbuf = gaim_unescape_html(url_buf);
@@ -1428,6 +1440,9 @@
 
 						if (*(t - 1) == '.')
 							t--;
+						if ((*(t - 1) == ')' && (inside_paren > 0))) {
+							t--;
+						}
 						strncpy(url_buf, c, t - c);
 						url_buf[t - c] = 0;
 						tmpurlbuf = gaim_unescape_html(url_buf);
@@ -1449,6 +1464,9 @@
 				if (badchar(*t)) {
 					if (*(t - 1) == '.')
 						t--;
+					if ((*(t - 1) == ')' && (inside_paren > 0))) {
+						t--;
+					}
 					strncpy(url_buf, c, t - c);
 					url_buf[t - c] = 0;
 					tmpurlbuf = gaim_unescape_html(url_buf);
@@ -1473,6 +1491,9 @@
 						}
 						if (*(t - 1) == '.')
 							t--;
+						if ((*(t - 1) == ')' && (inside_paren > 0))) {
+							t--;
+						}
 						strncpy(url_buf, c, t - c);
 						url_buf[t - c] = 0;
 						tmpurlbuf = gaim_unescape_html(url_buf);
@@ -1509,9 +1530,8 @@
 
 			}
 		} else if (c != text && (*c == '@')) {
-			char *tmp;
 			int flag;
-			int len = 0;
+			GString *gurl_buf;
 			const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
 			url_buf[0] = 0;
 
@@ -1521,20 +1541,19 @@
 				flag = 1;
 
 			t = c;
+			gurl_buf = g_string_new("");
 			while (flag) {
-				if (badchar(*t)) {
-					ret = g_string_truncate(ret, ret->len - (len - 1));
+				/* iterate backwards grabbing the local part of an email address */
+				g = g_utf8_get_char(t);
+				if (badchar(*t) || (g >= 127) || (*t == '(') ||
+						((*t == ';') && (t > (text+2)) && !g_ascii_strncasecmp(t - 3, "&lt;", 4))) {
+					/* local part will already be part of ret, strip it out */
+					ret = g_string_truncate(ret, ret->len - (c - t));
+					ret = g_string_append_unichar(ret, g);
 					break;
 				} else {
-					len++;
-					tmp = g_malloc(len + 1);
-					tmp[len] = 0;
-					tmp[0] = *t;
-					strncpy(tmp + 1, url_buf, len - 1);
-					strcpy(url_buf, tmp);
-					url_buf[len] = 0;
-					g_free(tmp);
-					t--;
+					g_string_prepend_unichar(gurl_buf, g);
+					t = g_utf8_find_prev_char(text, t);
 					if (t < text) {
 						ret = g_string_assign(ret, "");
 						break;
@@ -1542,12 +1561,18 @@
 				}
 			}
 
-			t = c + 1;
+			t = g_utf8_find_next_char(c, NULL);
 
 			while (flag) {
-				if (badchar(*t)) {
+				/* iterate forwards grabbing the domain part of an email address */
+				g = g_utf8_get_char(t);
+				if (badchar(*t) || (g >= 127) || (*t == ')') ||
+						((*t == '&') && !g_ascii_strncasecmp(t, "&gt;", 4))) {
 					char *d;
 
+					strcpy(url_buf, gurl_buf->str);
+
+					/* strip off trailing periods */
 					for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
 						*d = '\0';
 
@@ -1563,13 +1588,17 @@
 
 					break;
 				} else {
-					strncat(url_buf, t, 1);
-					len++;
-					url_buf[len] = 0;
+					g_string_append_unichar(gurl_buf, g);
+					t = g_utf8_find_next_char(t, NULL);
 				}
-
-				t++;
 			}
+			g_string_free(gurl_buf, TRUE);
+		}
+
+		if(*c == ')' && !inside_html) {
+			inside_paren--;
+			ret = g_string_append_c(ret, *c);
+			c++;
 		}
 
 		if (*c == 0)