changeset 10258:357d4fa1bfbe

[gaim-migrate @ 11400] This is the IRC fallback encoding patch and gaim_utf8_salvage function that just hit oldstatus. If CVS didn't suck, I wouldn't have to generate two commits for this. :-P committer: Tailor Script <tailor@pidgin.im>
author Ethan Blanton <elb@pidgin.im>
date Wed, 24 Nov 2004 06:39:47 +0000
parents b98f856e2e5e
children 1aa83016fbe0
files src/protocols/irc/irc.c src/protocols/irc/parse.c src/util.c src/util.h
diffstat 4 files changed, 75 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/src/protocols/irc/irc.c	Wed Nov 24 02:16:36 2004 +0000
+++ b/src/protocols/irc/irc.c	Wed Nov 24 06:39:47 2004 +0000
@@ -697,7 +697,7 @@
 	option = gaim_account_option_int_new(_("Port"), "port", IRC_DEFAULT_PORT);
 	prpl_info.protocol_options = g_list_append(prpl_info.protocol_options, option);
 
-	option = gaim_account_option_string_new(_("Encoding"), "encoding", IRC_DEFAULT_CHARSET);
+	option = gaim_account_option_string_new(_("Encodings"), "encoding", IRC_DEFAULT_CHARSET);
 	prpl_info.protocol_options = g_list_append(prpl_info.protocol_options, option);
 
 	option = gaim_account_option_string_new(_("Username"), "username", "");
--- a/src/protocols/irc/parse.c	Wed Nov 24 02:16:36 2004 +0000
+++ b/src/protocols/irc/parse.c	Wed Nov 24 06:39:47 2004 +0000
@@ -26,6 +26,7 @@
 #include "conversation.h"
 #include "notify.h"
 #include "debug.h"
+#include "util.h"
 #include "cmds.h"
 #include "irc.h"
 
@@ -201,47 +202,59 @@
 {
 	char *utf8;
 	GError *err = NULL;
-	const gchar *charset;
+	gchar **encodings;
+	const gchar *enclist;
 
-	charset = gaim_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET);
-	if (!strcasecmp("UTF-8", charset))
+	enclist = gaim_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET);
+	encodings = g_strsplit(enclist, ",", 2);
+
+	if (encodings[0] == NULL || !strcasecmp("UTF-8", encodings[0]))
 		return g_strdup(string);
 
-	utf8 = g_convert(string, strlen(string), charset, "UTF-8", NULL, NULL, &err);
+	utf8 = g_convert(string, strlen(string), encodings[0], "UTF-8", NULL, NULL, &err);
 	if (err) {
 		gaim_debug(GAIM_DEBUG_ERROR, "irc", "Send conversion error: %s\n", err->message);
-		gaim_debug(GAIM_DEBUG_ERROR, "irc", "Sending as UTF-8 instead of %s\n", charset);
+		gaim_debug(GAIM_DEBUG_ERROR, "irc", "Sending as UTF-8 instead of %s\n", encodings[0]);
 		utf8 = g_strdup(string);
 		g_error_free(err);
 	}
-	
+	g_strfreev(encodings);
+
 	return utf8;
 }
 
 static char *irc_recv_convert(struct irc_conn *irc, const char *string)
 {
 	char *utf8 = NULL;
-	GError *err = NULL;
-	const gchar *charset;
+	const gchar *charset, *enclist;
+	gchar **encodings;
+	int i;
 
-	charset = gaim_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET);
+	enclist = gaim_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET);
+	encodings = g_strsplit(enclist, ",", -1);
+
+	if (encodings[0] == NULL)
+		return gaim_utf8_salvage(string);
 
-	if (!strcasecmp("UTF-8", charset)) {
-		if (g_utf8_validate(string, strlen(string), NULL))
-			utf8 = g_strdup(string);
-	} else {
-		utf8 = g_convert(string, strlen(string), "UTF-8", charset, NULL, NULL, &err);
+	for (i = 0; encodings[i] != NULL; i++) {
+		charset = encodings[i];
+		while (*charset == ' ')
+			charset++;
+
+		if (!strcasecmp("UTF-8", charset)) {
+			if (g_utf8_validate(string, strlen(string), NULL))
+				utf8 = g_strdup(string);
+		} else {
+			utf8 = g_convert(string, strlen(string), "UTF-8", charset, NULL, NULL, NULL);
+		}
+
+		if (utf8) {
+			g_strfreev(encodings);
+			return utf8;
+		}
 	}
 
-	if (err) {
-		gaim_debug(GAIM_DEBUG_ERROR, "irc", "recv conversion error: %s\n", err->message);
-		g_error_free(err);
-	}
-
-	if (utf8 == NULL)
-		utf8 = g_strdup(_("(There was an error converting this message.  Check the 'Encoding' option in the Account Editor)"));
-
-	return utf8;
+	return gaim_utf8_salvage(string);
 }
 
 /* XXX tag closings are not necessarily correctly nested here!  If we
--- a/src/util.c	Wed Nov 24 02:16:36 2004 +0000
+++ b/src/util.c	Wed Nov 24 06:39:47 2004 +0000
@@ -3057,6 +3057,33 @@
 	return NULL;
 }
 
+#define utf8_first(x) ((x & 0x80) == 0 || (x & 0xe0) == 0xc0 \
+		       || (x & 0xf0) == 0xe0 || (x & 0xf8) == 0xf)
+gchar *
+gaim_utf8_salvage(const char *str)
+{
+	GString *workstr;
+	const char *end;
+
+	g_return_val_if_fail(str != NULL, NULL);
+
+	workstr = g_string_sized_new(strlen(str));
+
+	do {
+		g_utf8_validate(str, -1, &end);
+		workstr = g_string_append_len(workstr, str, end - str);
+		str = end;
+		if (*str == '\0')
+			break;
+		do {
+			workstr = g_string_append_c(workstr, '?');
+			str++;
+		} while (!utf8_first(*str));
+	} while (*str != '\0');
+
+	return g_string_free(workstr, FALSE);
+}
+
 char *
 gaim_utf8_ncr_decode(const char *in)
 {
--- a/src/util.h	Wed Nov 24 02:16:36 2004 +0000
+++ b/src/util.h	Wed Nov 24 06:39:47 2004 +0000
@@ -693,6 +693,17 @@
 gchar *gaim_utf8_try_convert(const char *str);
 
 /**
+ * Salvages the valid UTF-8 characters from a string, replacing any
+ * invalid characters with a filler character (currently hardcoded to
+ * '?').
+ *
+ * @param str The source string.
+ *
+ * @return A valid UTF-8 string.
+ */
+gchar *gaim_utf8_salvage(const char *str);
+
+/**
  * Compares two UTF-8 strings.
  *
  * @param a The first string.