changeset 24806:e7b27ee5e7b6

Make purple_utf8_has_word() better about detecting word boundaries (still not really "correct") and prevent stuff like "&" spuriously matching a nick of "amp". Fixes #7328.
author Daniel Atallah <daniel.atallah@gmail.com>
date Fri, 19 Dec 2008 04:11:15 +0000
parents 7608cf033a88
children c7d11c2673cc
files libpurple/util.c
diffstat 1 files changed, 20 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/libpurple/util.c	Fri Dec 19 03:11:59 2008 +0000
+++ b/libpurple/util.c	Fri Dec 19 04:11:15 2008 +0000
@@ -4572,18 +4572,35 @@
 purple_utf8_has_word(const char *haystack, const char *needle)
 {
 	char *hay, *pin, *p;
+	const char *start, *prev_char;
+	gunichar before, after;
 	int n;
 	gboolean ret = FALSE;
 
-	hay = g_utf8_strdown(haystack, -1);
+	start = hay = g_utf8_strdown(haystack, -1);
 
 	pin = g_utf8_strdown(needle, -1);
 	n = strlen(pin);
 
-	if ((p = strstr(hay, pin)) != NULL) {
-		if ((p == hay || !isalnum(*(p - 1))) && !isalnum(*(p + n))) {
+	while ((p = strstr(start, pin)) != NULL) {
+		prev_char = g_utf8_find_prev_char(hay, p);
+		before = -2;
+		if (prev_char) {
+			before = g_utf8_get_char(prev_char);
+		}
+		after = g_utf8_get_char_validated(p + n, - 1);
+
+		if ((p == hay ||
+				/* The character before is a reasonable guess for a word boundary
+				   ("!g_unichar_isalnum()" is not a valid way to determine word
+				    boundaries, but it is the only reasonable thing to do here),
+				   and isn't the '&' from a "&amp;" or some such entity*/
+				(before != -2 && !g_unichar_isalnum(before) && *(p - 1) != '&'))
+				&& after != -2 && !g_unichar_isalnum(after)) {
 			ret = TRUE;
+			break;
 		}
+		start = p + 1;
 	}
 
 	g_free(pin);