# HG changeset patch # User Ethan Blanton # Date 1101278387 0 # Node ID 357d4fa1bfbe31e5df5e22ba816f2284a57a0b23 # Parent b98f856e2e5e3d9917a7798b6fae4ae837b5db83 [gaim-migrate @ 11400] This is the IRC fallback encoding patch and gaim_utf8_salvage function that just hit oldstatus. If CVS didn't suck, I wouldn't have to generate two commits for this. :-P committer: Tailor Script diff -r b98f856e2e5e -r 357d4fa1bfbe src/protocols/irc/irc.c --- a/src/protocols/irc/irc.c Wed Nov 24 02:16:36 2004 +0000 +++ b/src/protocols/irc/irc.c Wed Nov 24 06:39:47 2004 +0000 @@ -697,7 +697,7 @@ option = gaim_account_option_int_new(_("Port"), "port", IRC_DEFAULT_PORT); prpl_info.protocol_options = g_list_append(prpl_info.protocol_options, option); - option = gaim_account_option_string_new(_("Encoding"), "encoding", IRC_DEFAULT_CHARSET); + option = gaim_account_option_string_new(_("Encodings"), "encoding", IRC_DEFAULT_CHARSET); prpl_info.protocol_options = g_list_append(prpl_info.protocol_options, option); option = gaim_account_option_string_new(_("Username"), "username", ""); diff -r b98f856e2e5e -r 357d4fa1bfbe src/protocols/irc/parse.c --- a/src/protocols/irc/parse.c Wed Nov 24 02:16:36 2004 +0000 +++ b/src/protocols/irc/parse.c Wed Nov 24 06:39:47 2004 +0000 @@ -26,6 +26,7 @@ #include "conversation.h" #include "notify.h" #include "debug.h" +#include "util.h" #include "cmds.h" #include "irc.h" @@ -201,47 +202,59 @@ { char *utf8; GError *err = NULL; - const gchar *charset; + gchar **encodings; + const gchar *enclist; - charset = gaim_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); - if (!strcasecmp("UTF-8", charset)) + enclist = gaim_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); + encodings = g_strsplit(enclist, ",", 2); + + if (encodings[0] == NULL || !strcasecmp("UTF-8", encodings[0])) return g_strdup(string); - utf8 = g_convert(string, strlen(string), charset, "UTF-8", NULL, NULL, &err); + utf8 = g_convert(string, strlen(string), encodings[0], "UTF-8", NULL, NULL, &err); if (err) { gaim_debug(GAIM_DEBUG_ERROR, "irc", "Send conversion error: %s\n", err->message); - gaim_debug(GAIM_DEBUG_ERROR, "irc", "Sending as UTF-8 instead of %s\n", charset); + gaim_debug(GAIM_DEBUG_ERROR, "irc", "Sending as UTF-8 instead of %s\n", encodings[0]); utf8 = g_strdup(string); g_error_free(err); } - + g_strfreev(encodings); + return utf8; } static char *irc_recv_convert(struct irc_conn *irc, const char *string) { char *utf8 = NULL; - GError *err = NULL; - const gchar *charset; + const gchar *charset, *enclist; + gchar **encodings; + int i; - charset = gaim_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); + enclist = gaim_account_get_string(irc->account, "encoding", IRC_DEFAULT_CHARSET); + encodings = g_strsplit(enclist, ",", -1); + + if (encodings[0] == NULL) + return gaim_utf8_salvage(string); - if (!strcasecmp("UTF-8", charset)) { - if (g_utf8_validate(string, strlen(string), NULL)) - utf8 = g_strdup(string); - } else { - utf8 = g_convert(string, strlen(string), "UTF-8", charset, NULL, NULL, &err); + for (i = 0; encodings[i] != NULL; i++) { + charset = encodings[i]; + while (*charset == ' ') + charset++; + + if (!strcasecmp("UTF-8", charset)) { + if (g_utf8_validate(string, strlen(string), NULL)) + utf8 = g_strdup(string); + } else { + utf8 = g_convert(string, strlen(string), "UTF-8", charset, NULL, NULL, NULL); + } + + if (utf8) { + g_strfreev(encodings); + return utf8; + } } - if (err) { - gaim_debug(GAIM_DEBUG_ERROR, "irc", "recv conversion error: %s\n", err->message); - g_error_free(err); - } - - if (utf8 == NULL) - utf8 = g_strdup(_("(There was an error converting this message. Check the 'Encoding' option in the Account Editor)")); - - return utf8; + return gaim_utf8_salvage(string); } /* XXX tag closings are not necessarily correctly nested here! If we diff -r b98f856e2e5e -r 357d4fa1bfbe src/util.c --- a/src/util.c Wed Nov 24 02:16:36 2004 +0000 +++ b/src/util.c Wed Nov 24 06:39:47 2004 +0000 @@ -3057,6 +3057,33 @@ return NULL; } +#define utf8_first(x) ((x & 0x80) == 0 || (x & 0xe0) == 0xc0 \ + || (x & 0xf0) == 0xe0 || (x & 0xf8) == 0xf) +gchar * +gaim_utf8_salvage(const char *str) +{ + GString *workstr; + const char *end; + + g_return_val_if_fail(str != NULL, NULL); + + workstr = g_string_sized_new(strlen(str)); + + do { + g_utf8_validate(str, -1, &end); + workstr = g_string_append_len(workstr, str, end - str); + str = end; + if (*str == '\0') + break; + do { + workstr = g_string_append_c(workstr, '?'); + str++; + } while (!utf8_first(*str)); + } while (*str != '\0'); + + return g_string_free(workstr, FALSE); +} + char * gaim_utf8_ncr_decode(const char *in) { diff -r b98f856e2e5e -r 357d4fa1bfbe src/util.h --- a/src/util.h Wed Nov 24 02:16:36 2004 +0000 +++ b/src/util.h Wed Nov 24 06:39:47 2004 +0000 @@ -693,6 +693,17 @@ gchar *gaim_utf8_try_convert(const char *str); /** + * Salvages the valid UTF-8 characters from a string, replacing any + * invalid characters with a filler character (currently hardcoded to + * '?'). + * + * @param str The source string. + * + * @return A valid UTF-8 string. + */ +gchar *gaim_utf8_salvage(const char *str); + +/** * Compares two UTF-8 strings. * * @param a The first string.