Mercurial > pidgin.yaz

--- a/libpurple/protocols/oscar/encoding.c	Thu Oct 14 14:53:35 2010 +0900
+++ b/libpurple/protocols/oscar/encoding.c	Sat Oct 16 21:05:34 2010 +0900
@@ -51,15 +51,34 @@
 	gchar *utf8 = NULL;
 	const gchar *glib_encoding = NULL;
 	gchar *extracted_encoding = encoding_extract(encoding);
-
+
 	if (extracted_encoding == NULL || *extracted_encoding == '\0') {
-		purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
+		purple_debug_info("yaz oscar", "Empty encoding, validate as UTF-8\n");
+		if(g_utf8_validate(text, textlen, NULL)){
+			gsize newlen;
+			utf8 = sanitize_utf(text, textlen, &newlen);
+			goto done;
+		}
+		// not UTF-8
+		purple_debug_info("yaz oscar", "Empty encoding, assuming UTF-16BE\n");
+		sanitize_ucs((gchar *)text, textlen);
+		utf8 = g_convert(text, textlen, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
+		if(utf8){
+			if(!g_utf8_validate(utf8, strlen(utf8), NULL)){
+				purple_debug_info("yaz oscar", "Invalid conversion\n");
+				g_free(utf8);
+				utf8 = NULL;
+			}
+		} else {
+			purple_debug_info("yaz oscar", "Conversion failed\n");
+		}
 	} else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
 		glib_encoding = "iso-8859-1";
 	} else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
 		glib_encoding = "Windows-1252";
 	} else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
 		glib_encoding = "UTF-16BE";
+		sanitize_ucs((gchar *)text, textlen);
 	} else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
 		purple_debug_warning("oscar", "Unrecognized character encoding \"%s\", attempting to convert to UTF-8 anyway\n", extracted_encoding);
 		glib_encoding = extracted_encoding;
@@ -82,6 +101,7 @@
 			utf8 = g_strndup(text, textlen);
 	}

+done:
 	g_free(extracted_encoding);
 	return utf8;
 }
@@ -113,7 +133,7 @@
 static gchar *
 oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
 {
-	gchar *ret = NULL;
+	gchar *ret = NULL, *ret2 = NULL;
 	GError *err = NULL;

 	if ((charsetstr == NULL) || (*charsetstr == '\0'))
@@ -136,7 +156,9 @@
 			purple_debug_warning("oscar", "String is not valid UTF-8.\n");
 	}

-	return ret;
+	ret2 = sanitize_utf(ret, -1, NULL);
+	g_free(ret);
+	return ret2;
 }

 gchar *
@@ -161,13 +183,14 @@
 	} else if (charset == AIM_CHARSET_ASCII) {
 		/* Should just be "ASCII" */
 		charsetstr1 = "ASCII";
-		charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
-	} else if (charset == 0x000d) {
+		charsetstr2 = "UTF-8";
+        charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
+	} else if (charset == AIM_CHARSET_QUIRKUTF8) {
 		/* iChat sending unicode over a Direct IM connection = UTF-8 */
-		/* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
-		charsetstr1 = "UTF-8";
-		charsetstr2 = "ISO-8859-1";
-		charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
+        /* Mobile AIM client on a Nokia 3100 and an LG VX6000 */
+        charsetstr1 = "UTF-8";
+        charsetstr2 = "ISO-8859-1";
+        charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
 	} else {
 		/* Unknown, hope for valid UTF-8... */
 		charsetstr1 = "UTF-8";
--- a/libpurple/protocols/oscar/oscar.c	Thu Oct 14 14:53:35 2010 +0900
+++ b/libpurple/protocols/oscar/oscar.c	Sat Oct 16 21:05:34 2010 +0900
@@ -1812,7 +1812,7 @@
 	 * for this suck-ass part of the protocol by splitting the string into at
 	 * most 1 baby string.
 	 */
-	msg1 = g_strsplit(args->msg, "\376", (args->type == 0x01 ? 1 : 0));
+	msg1 = g_strsplit(args->msg, "\376", (args->type == 0x01 ? 1 : 0)); // \376 is 0xfe
 	for (numtoks=0; msg1[numtoks]; numtoks++);
 	msg2 = (gchar **)g_malloc((numtoks+1)*sizeof(gchar *));
 	for (i=0; msg1[i]; i++) {
@@ -3069,8 +3069,6 @@
 {
 	GString *msg;
 	GString *data;
-	gchar *tmp;
-	gsize tmplen;
 	guint16 charset;
 	GData *attribs;
 	const char *start, *end, *last;
@@ -3131,11 +3129,9 @@

 	g_string_append(msg, "</BODY></HTML>");

-	/* Convert the message to a good encoding */
-	tmp = oscar_encode_im(msg->str, &tmplen, &charset, NULL);
-	g_string_free(msg, TRUE);
-	msg = g_string_new_len(tmp, tmplen);
-	g_free(tmp);
+	/* iChat and AIM6 use 0x000d to send UTF8.
+       moreover, AIM6 persists only to UTF8! --yaz */
+    charset = AIM_CHARSET_QUIRKUTF8;

 	/* Append any binary data that we may have */
 	if (oscar_id) {
@@ -3273,10 +3269,14 @@
 			/* Messaging an SMS (mobile) user--strip HTML */
 			tmp2 = purple_markup_strip_html(tmp1);
 			is_html = FALSE;
-		} else {
+		} else if (od->icq) {
 			/* ICQ 6 wants its HTML wrapped in these tags. Oblige it. */
 			tmp2 = g_strdup_printf("<HTML><BODY>%s</BODY></HTML>", tmp1);
 			is_html = TRUE;
+		} else {
+			/* otherwise */
+			tmp2 = g_strdup(tmp1);
+			is_html = FALSE;
 		}
 		g_free(tmp1);
 		tmp1 = tmp2;
@@ -4457,7 +4457,7 @@
 				message, buf2);
 	}

-	aim_chat_send_im(od, c->conn, 0, buf2, len, charsetstr, "en");
+	aim_chat_send_im(od, c->conn, 0, buf2, len, charsetstr, "JA");
 	g_free(buf2);
 	g_free(buf);