diff libpurple/util.c @ 27944:cfa78428d8b2

propagate from branch 'im.pidgin.pidgin' (head 8f2b3a7063a7a0e5cae9de55e55f7066c0d202d6) to branch 'im.pidgin.pidgin.yaz' (head f0674cac79a9d60c5a3d998b37fc2cd75ce0a6b2)
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Sat, 02 May 2009 20:35:36 +0000
parents ab31daf1c1ee 1688f7e15530
children c2ac87c5a035
line wrap: on
line diff
--- a/libpurple/util.c	Sat May 02 19:14:07 2009 +0000
+++ b/libpurple/util.c	Sat May 02 20:35:36 2009 +0000
@@ -1223,6 +1223,8 @@
 		return FALSE;
 
 	q = strstr(p, end_token);
+	if(q == NULL) //yaz
+		return FALSE;
 
 	/* Trim leading blanks */
 	while (*p != '\n' && g_ascii_isspace(*p)) {
@@ -1235,7 +1237,7 @@
 	}
 
 	/* Don't bother with null strings */
-	if (p == q)
+	if (p >= q)
 		return FALSE;
 
 	if (q != NULL && (!no_value_token ||
@@ -2006,6 +2008,20 @@
 }
 
 static gboolean
+is_zenkaku_space(const char *c)
+{
+    gboolean rv = FALSE;
+    const guchar *u = (guchar *)c;
+
+    if(!c || !strcmp(c, "") || strlen(c) < 3)
+        rv = FALSE;
+    else if(*u == 0xe3 && *(u+1) == 0x80 && *(u+2) == 0x80)
+        rv = TRUE;
+
+    return rv;
+}
+
+static gboolean
 badchar(char c)
 {
 	switch (c) {
@@ -2043,6 +2059,7 @@
 	gunichar g;
 	gboolean inside_html = FALSE;
 	int inside_paren = 0;
+	int inside_bracket = 0;
 	GString *ret;
 
 	if (text == NULL)
@@ -2059,6 +2076,12 @@
 			c++;
 		}
 
+		if(*c == '[' && !inside_html) {
+			inside_bracket++;
+			ret = g_string_append_c(ret, *c);
+			c++;
+		}
+
 		if(inside_html) {
 			if(*c == '>') {
 				inside_html = FALSE;
@@ -2086,7 +2109,7 @@
 					(!g_ascii_strncasecmp(c, "https://", 8)))) {
 			t = c;
 			while (1) {
-				if (badchar(*t) || badentity(t)) {
+				if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
 
 					if ((!g_ascii_strncasecmp(c, "http://", 7) && (t - c == 7)) ||
 						(!g_ascii_strncasecmp(c, "https://", 8) && (t - c == 8))) {
@@ -2103,6 +2126,9 @@
 					if ((*(t - 1) == ')' && (inside_paren > 0))) {
 						t--;
 					}
+					if ((*(t - 1) == ']' && (inside_bracket > 0))) {
+						t--;
+					}
 
 					url_buf = g_strndup(c, t - c);
 					tmpurlbuf = purple_unescape_html(url_buf);
@@ -2120,7 +2146,7 @@
 			if (c[4] != '.') {
 				t = c;
 				while (1) {
-					if (badchar(*t) || badentity(t)) {
+					if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
 						if (t - c == 4) {
 							break;
 						}
@@ -2135,6 +2161,9 @@
 						if ((*(t - 1) == ')' && (inside_paren > 0))) {
 							t--;
 						}
+						if ((*(t - 1) == ']' && (inside_bracket > 0))) {
+							t--;
+						}
 						url_buf = g_strndup(c, t - c);
 						tmpurlbuf = purple_unescape_html(url_buf);
 						g_string_append_printf(ret,
@@ -2151,7 +2180,7 @@
 		} else if (!g_ascii_strncasecmp(c, "ftp://", 6) || !g_ascii_strncasecmp(c, "sftp://", 7)) {
 			t = c;
 			while (1) {
-				if (badchar(*t) || badentity(t)) {
+				if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
 
 					if ((!g_ascii_strncasecmp(c, "ftp://", 6) && (t - c == 6)) ||
 						(!g_ascii_strncasecmp(c, "sftp://", 7) && (t - c == 7))) {
@@ -2163,6 +2192,9 @@
 					if ((*(t - 1) == ')' && (inside_paren > 0))) {
 						t--;
 					}
+					if ((*(t - 1) == ']' && (inside_bracket > 0))) {
+						t--;
+					}
 					url_buf = g_strndup(c, t - c);
 					tmpurlbuf = purple_unescape_html(url_buf);
 					g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
@@ -2181,7 +2213,7 @@
 			if (c[4] != '.') {
 				t = c;
 				while (1) {
-					if (badchar(*t) || badentity(t)) {
+					if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
 						if (t - c == 4) {
 							break;
 						}
@@ -2190,6 +2222,9 @@
 						if ((*(t - 1) == ')' && (inside_paren > 0))) {
 							t--;
 						}
+						if ((*(t - 1) == ']' && (inside_bracket > 0))) {
+							t--;
+						}
 						url_buf = g_strndup(c, t - c);
 						tmpurlbuf = purple_unescape_html(url_buf);
 						g_string_append_printf(ret,
@@ -2208,7 +2243,7 @@
 		} else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
 			t = c;
 			while (1) {
-				if (badchar(*t) || badentity(t)) {
+				if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
 					char *d;
 					if (t - c == 7) {
 						break;
@@ -2242,7 +2277,7 @@
 				   (c == text || badchar(c[-1]) || badentity(c-1))) {
 			t = c;
 			while (1) {
-				if (badchar(*t) || badentity(t)) {
+				if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
 
 					if (t - c == 5) {
 						break;
@@ -2258,6 +2293,9 @@
 					if ((*(t - 1) == ')' && (inside_paren > 0))) {
 						t--;
 					}
+					if ((*(t - 1) == ']' && (inside_bracket > 0))) {
+						t--;
+					}
 
 					url_buf = g_strndup(c, t - c);
 					tmpurlbuf = purple_unescape_html(url_buf);
@@ -2345,6 +2383,11 @@
 			ret = g_string_append_c(ret, *c);
 			c++;
 		}
+		if(*c == ']' && !inside_html) {
+			inside_bracket--;
+			ret = g_string_append_c(ret, *c);
+			c++;
+		}
 
 		if (*c == 0)
 			break;
@@ -4849,3 +4892,296 @@
 	}
 #endif
 }
+
+#ifdef _WIN32
+void botch_ucs(gchar *ucs_src, gsize len)
+{
+	/* no operation */
+}
+#else
+void botch_ucs(gchar *ucs_src, gsize len)
+{
+	gint i;
+	guchar *ucs = (guchar *)ucs_src;
+
+	g_return_if_fail(ucs_src != NULL);
+	g_return_if_fail(len > 0);
+
+ 	for(i=0;i<len;i+=2){
+ 		switch(*(ucs+i)){
+ 		case 0x00:
+ 			switch(*(ucs+i+1)){
+ 			case 0xa2:	// ¢
+ 				*(ucs+i) = 0xff;
+ 				*(ucs+i+1) = 0xe0;
+ 				break;
+ 			case 0xa3:	// £
+ 				*(ucs+i) = 0xff;
+				*(ucs+i+1) = 0xe1;
+ 				break;
+ 			case 0xac:	// ¬
+ 				*(ucs+i) = 0xff;
+ 				*(ucs+i+1) = 0xe2;
+ 				break;
+ 			}
+ 			break;
+ 		case 0x20:	// ‖
+ 			if(*(ucs+i+1) == 0x16){
+ 				*(ucs+i) = 0x22;
+				*(ucs+i+1) = 0x25;
+ 			}
+ 			break;
+ 		case 0x22:	// −
+ 			if(*(ucs+i+1) == 0x12){
+ 				*(ucs+i) = 0xff;
+ 				*(ucs+i+1) = 0x0d;
+ 			}
+ 			break;
+ 		case 0x30:	// 〜
+ 			if(*(ucs+i+1) == 0x1c){
+ 				*(ucs+i) = 0xff;
+ 				*(ucs+i+1) = 0x5e;
+ 			}
+ 			break;
+ 		}
+ 	}
+
+}
+#endif
+
+#ifdef _WIN32
+void sanitize_ucs(gchar *ucs, gsize len)
+{
+	/* no operation */
+}
+#else
+void sanitize_ucs(gchar *ucs_src, gsize len)
+{
+	gint i;
+	guchar *ucs = (guchar *)ucs_src;
+
+	g_return_if_fail(ucs_src != NULL);
+	g_return_if_fail(len > 0);
+
+	for(i=0;i<len;i+=2){
+		switch(*(ucs+i)){
+		case 0x22:
+			switch(*(ucs+i+1)){
+			case 0x25:	// ‖
+				*(ucs+i) = 0x20;
+				*(ucs+i+1) = 0x16;
+				break;
+			}
+			break;
+		case 0xff:
+			switch(*(ucs+i+1)){
+			case 0x0d:	// −
+				*(ucs+i) = 0x22;
+				*(ucs+i+1) = 0x12;
+				break;
+			case 0x5e:	// 〜
+				*(ucs+i) = 0x30;
+				*(ucs+i+1) = 0x1c;
+				break;
+			case 0xe0:	// ¢
+				*(ucs+i) = 0x00;
+				*(ucs+i+1) = 0xa2;
+				break;
+			case 0xe1:	// £
+				*(ucs+i) = 0x00;
+				*(ucs+i+1) = 0xa3;
+				break;
+			case 0xe2:	// ¬
+				*(ucs+i) = 0x00;
+				*(ucs+i+1) = 0xac;
+				break;
+			}
+			break;
+		}
+	}
+}
+#endif
+
+#ifdef _WIN32
+gchar *sanitize_utf(const gchar *msg, gsize len, gsize *newlen)
+{
+	g_return_val_if_fail(msg != NULL, NULL);
+	if(len == -1)
+		len = strlen(msg);
+	g_return_val_if_fail(len > 0, NULL);
+
+	if(newlen)
+		*newlen = len;
+
+	return g_strndup(msg, len);
+}
+#else
+gchar *sanitize_utf(const gchar *msg, gsize len, gsize *newlen)
+{
+	gint i;
+	size_t bytes;
+	guchar *utf;
+
+	g_return_val_if_fail(msg != NULL, NULL);
+	if(len == -1)
+		len = strlen(msg);
+	g_return_val_if_fail(len > 0, NULL);
+
+	utf = (guchar *)g_strndup(msg, len);
+
+	bytes = len;
+
+	for(i=0;i<len;i++){
+		switch(*(utf+i)){
+		case 0xe2:
+			if(*(utf+i+1) == 0x88) {
+				if(*(utf+i+2) == 0xa5) {	// ‖
+					*(utf+i) = 0xe2;
+					*(utf+i+1) = 0x80;
+					*(utf+i+2) = 0x96;
+				}
+			}
+			break;
+		case 0xef:
+			switch(*(utf+i+1)){
+			case 0xbc:
+				if(*(utf+i+2) == 0x8d) {	// −
+					*(utf+i) = 0xe2;
+					*(utf+i+1) = 0x88;
+					*(utf+i+2) = 0x92;
+				}
+				break;
+			case 0xbd:
+				if(*(utf+i+2) == 0x9e) {	// 〜
+					*(utf+i) = 0xe3;
+					*(utf+i+1) = 0x80;
+					*(utf+i+2) = 0x9c;
+				}
+				break;
+			case 0xbf:
+				switch(*(utf+i+2)){
+			       case 0xa0:	// ¢
+				       *(utf+i) = 0xc2;
+				       *(utf+i+1) = 0xa2;
+				       memmove(utf+i+2, utf+i+3,
+					       len-i-3); //shorten by 1byte
+				       bytes--;
+				       break;
+			       case 0xa1:	// £
+				       *(utf+i) = 0xc2;
+				       *(utf+i+1) = 0xa3;
+				       memmove(utf+i+2, utf+i+3,
+					       len-i-3); //shorten by 1byte
+				       bytes--;
+				       break;
+			       case 0xa2:	// ¬
+				       *(utf+i) = 0xc2;
+				       *(utf+i+1) = 0xac;
+				       memmove(utf+i+2, utf+i+3,
+					       len-i-3); //shorten by 1byte
+				       bytes--;
+				       break;
+			       }
+			       break;
+			}
+			break;
+		}
+	}
+	*(utf+bytes)= 0x00; //terminate
+	if(newlen)
+		*newlen = bytes;
+	return (gchar *)utf;
+}
+#endif
+
+#ifdef _WIN32
+gchar *botch_utf(const gchar *msg, gsize len, gsize *newlen)
+{
+	g_return_val_if_fail(msg != NULL, NULL);
+	if(len == -1)
+		len = strlen(msg);
+	g_return_val_if_fail(len > 0, NULL);
+
+	if(newlen)
+		*newlen = len;
+
+	return g_strndup(msg, len);
+}
+#else
+gchar *botch_utf(const gchar *msg, gsize len, gsize *newlen)
+{
+ 	int i,bytes;
+	unsigned char *utf;
+
+	g_return_val_if_fail(msg != NULL, NULL);
+	if(len == -1)
+		len = strlen(msg);
+	g_return_val_if_fail(len > 0, NULL);
+
+	bytes = len;
+
+	utf = g_malloc0(bytes*3/2+1); /* new length might be 3/2 in the worst case */
+	memcpy(utf, msg, bytes);
+
+ 	for(i=0;i<bytes;i++){
+ 		switch(*(utf+i)){
+ 		case 0xc2:
+ 			switch(*(utf+i+1)){
+ 			case 0xa2:	// ¢
+ 				*(utf+i) = 0xef;
+ 				*(utf+i+1) = 0xbf;
+				memmove(utf+i+3, utf+i+2, bytes-i-2);
+				*(utf+i+2) = 0xa0;
+				bytes++;
+ 				break;
+ 			case 0xa3:	// £
+ 				*(utf+i) = 0xef;
+ 				*(utf+i+1) = 0xbf;
+				memmove(utf+i+3, utf+i+2, bytes-i-2);
+				*(utf+i+2) = 0xa1;
+				bytes++;
+ 				break;
+ 			case 0xac:	// ¬
+ 				*(utf+i) = 0xef;
+ 				*(utf+i+1) = 0xbf;
+				memmove(utf+i+3, utf+i+2, bytes-i-2);
+				*(utf+i+2) = 0xa2;
+				bytes++;
+ 				break;
+ 			}
+ 			break;
+ 		case 0xe2:
+			switch(*(utf+i+1)){
+			case 0x80:	// ‖
+				if(*(utf+i+2) == 0x96){
+					*(utf+i) = 0xe2;
+					*(utf+i+1) = 0x88;
+					*(utf+i+2) = 0xa5;
+				}
+				break;
+			case 0x88:	// −
+				if(*(utf+i+1) == 0x92){
+					*(utf+i) = 0xef;
+					*(utf+i+1) = 0xbc;
+					*(utf+i+2) = 0x8d;
+				}
+				break;
+			}
+			break;
+ 		case 0xe3:	// 〜
+ 			if(*(utf+i+1) == 0x80){
+				if(*(utf+i+2) == 0x9c){
+					*(utf+i) = 0xef;
+					*(utf+i+1) = 0xbd;
+					*(utf+i+2) = 0x9e;
+				}
+ 			}
+ 			break;
+ 		} //switch
+ 	}
+	*(utf+bytes) = 0x00; //terminate
+	if(newlen)
+		*newlen = bytes;
+	return (gchar *)utf;
+}
+#endif