diff libpurple/util.c @ 30267:5bac51b394e6

util: Better validation of the allowed character values in XML 1.0 From http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char. Refs #11257 This doesn't actually make a difference, because I think all the invalid ranges aren't valid UTF-8 and so g_utf8_validate catches them.
author Paul Aurich <paul@darkrain42.org>
date Thu, 29 Apr 2010 17:17:00 +0000
parents 37be7bc87ab2
children 77aba27f64da b06f69ada7a5
line wrap: on
line diff
--- a/libpurple/util.c	Thu Apr 29 05:58:02 2010 +0000
+++ b/libpurple/util.c	Thu Apr 29 17:17:00 2010 +0000
@@ -4593,12 +4593,22 @@
 	}
 
 	workstr = iter = g_new(gchar, strlen(str) + 1);
-	for ( ; *str; ++str) {
-		guchar c = *str;
-		if (c >= 0x20 || c == '\t' || c == '\n' || c == '\r') {
-			*iter = c;
-			++iter;
+	while (*str) {
+		gunichar ch = g_utf8_get_char(str);
+		gchar *next = g_utf8_next_char(str);
+		/*
+		 * Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
+		 *          [#x10000-#x10FFFF]
+		 */
+		if ((ch == '\t' || ch == '\n' || ch == '\r') ||
+				(ch >= 0x20 && ch <= 0xD7FF) ||
+				(ch >= 0xE000 && ch <= 0xFFFD) ||
+				(ch >= 0x10000 && ch <= 0x10FFFF)) {
+			memcpy(iter, str, next - str);
+			iter += (next - str);
 		}
+
+		str = next;
 	}
 
 	/* nul-terminate the new string */