# HG changeset patch
# User Luke Schierer <lschiere@pidgin.im>
# Date 1080886694 0
# Node ID 2d4ccd94e298a4c4cf92de4fab510d97b8b7c3d7
# Parent  832fd9b754d0a78c1ef89434a7f034353f48de99
[gaim-migrate @ 9305]
" In the irc tooltip, there's a line "Channel:". In 0.75,
this seems to have been merged with the "_Channel:" line.

In English, this works because underscores in the
tooltip are removed before being displayed. However, in
Chinese and Japanese, the translation of "_Channel:"
looks like "Channel (_C):" and this translated text
does not make any sense in the tooltip.

The tooltip thus should not use the "_Channel:" string.
Otherwise the tooltip output would look very strange in
certain locales (at least in Chinese and Japanese)." --Ambrose C. LI

who continues:
"This second patch should be better. It correctly undoes the
space character typically present before the left
parenthesis, and added some checks so that it should not
corrupt multibyte utf-8 characters.

However, this has not been tested a lot. UTF8 handling is
also not an area I am familiar with.

I don't know whether the C library has existing functions to
handle the utf8 things."

i'm assuming we have time to test this before 0.77

committer: Tailor Script <tailor@pidgin.im>

diff -r 832fd9b754d0 -r 2d4ccd94e298 src/util.c
--- a/src/util.c	Fri Apr 02 06:06:45 2004 +0000
+++ b/src/util.c	Fri Apr 02 06:18:14 2004 +0000
@@ -2475,6 +2475,7 @@
 {
 	char *out;
 	char *a;
+	char *a0;
 	const char *b;
 
 	g_return_val_if_fail(in != NULL, NULL);
@@ -2483,16 +2484,47 @@
 	a = out;
 	b = in;
 
+	a0 = a; /* The last non-space char seen so far, or the first char */
+
 	while(*b) {
 		if(*b == '_') {
-			if(*(b+1) == '_') {
+			if(a > out && b > in && *(b-1) == '(' && *(b+1) && !(*(b+1) & 0x80) && *(b+2) == ')') {
+				/* Detected CJK style shortcut (Bug 875311) */
+				a = a0;	/* undo the left parenthesis */
+				b += 3;	/* and skip the whole mess */
+			} else if(*(b+1) == '_') {
 				*(a++) = '_';
 				b += 2;
+				a0 = a;
 			} else {
 				b++;
 			}
+		/* We don't want to corrupt the middle of UTF-8 characters */
+		} else if (!(*b & 0x80)) {	/* other 1-byte char */
+			if (*b != ' ')
+				a0 = a;
+			*(a++) = *(b++);
 		} else {
-			*(a++) = *(b++);
+			/* Multibyte utf8 char, don't look for _ inside these */
+			int n = 0;
+			int i;
+			if ((*b & 0xe0) == 0xc0) {
+				n = 2;
+			} else if ((*b & 0xf0) == 0xe0) {
+				n = 3;
+			} else if ((*b & 0xf8) == 0xf0) {
+				n = 4;
+			} else if ((*b & 0xfc) == 0xf8) {
+				n = 5;
+			} else if ((*b & 0xfe) == 0xfc) {
+				n = 6;
+			} else {		/* Illegal utf8 */
+				n = 1;
+			}
+			a0 = a; /* unless we want to delete CJK spaces too */
+			for (i = 0; i < n && *b; i += 1) {
+				*(a++) = *(b++);
+			}
 		}
 	}
 	*a = '\0';