comparison libpurple/protocols/oscar/encoding.c @ 30386:ca90b6c27eb8

Refactored oscar_encoding_to_utf8(). 1. Removed elb's hack from #1645. It doesn't appear to be necessary anymore, since the recent official clients (6.5, 7.1) aren't that stupid now. 2. Simplified logic in incomingim_chan2(). 3. Removed all NULL return check for oscar_encoding_to_utf8(), because it will always return non-NULL value.
author ivan.komarov@soc.pidgin.im
date Wed, 28 Jul 2010 16:30:04 +0000
parents 9d386bf63eab
children 5661f30d1b8e
comparison
equal deleted inserted replaced
30385:9d386bf63eab 30386:ca90b6c27eb8
31 } 31 }
32 } 32 }
33 return AIM_CHARSET_ASCII; 33 return AIM_CHARSET_ASCII;
34 } 34 }
35 35
36 gchar * 36 static gchar *
37 oscar_encoding_extract(const char *encoding) 37 encoding_extract(const char *encoding)
38 { 38 {
39 gchar *ret = NULL;
40 char *begin, *end; 39 char *begin, *end;
41 40
42 g_return_val_if_fail(encoding != NULL, NULL); 41 g_return_val_if_fail(encoding != NULL, NULL);
43 42
44 /* Make sure encoding begins with charset= */ 43 if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") &&
45 if (strncmp(encoding, "text/aolrtf; charset=", 21) && 44 !g_str_has_prefix(encoding, "text/x-aolrtf; charset=") &&
46 strncmp(encoding, "text/x-aolrtf; charset=", 23) && 45 !g_str_has_prefix(encoding, "text/plain; charset=")) {
47 strncmp(encoding, "text/plain; charset=", 20)) 46 return g_strdup(encoding);
48 {
49 return NULL;
50 } 47 }
51 48
52 begin = strchr(encoding, '"'); 49 begin = strchr(encoding, '"');
53 end = strrchr(encoding, '"'); 50 end = strrchr(encoding, '"');
54 51
55 if ((begin == NULL) || (end == NULL) || (begin >= end)) 52 if ((begin == NULL) || (end == NULL) || (begin >= end)) {
56 return NULL; 53 return g_strdup(encoding);
57 54 }
58 ret = g_strndup(begin+1, (end-1) - begin); 55
59 56 return g_strndup(begin+1, (end-1) - begin);
60 return ret; 57 }
61 } 58
62 59 gchar *
63 gchar * 60 oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen)
64 oscar_encoding_to_utf8(PurpleAccount *account, const char *encoding, const char *text, int textlen)
65 { 61 {
66 gchar *utf8 = NULL; 62 gchar *utf8 = NULL;
67 63 const gchar *glib_encoding = NULL;
68 if ((encoding == NULL) || encoding[0] == '\0') { 64 gchar *extracted_encoding = encoding_extract(encoding);
65
66 if (extracted_encoding == NULL || *extracted_encoding == '\0') {
69 purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n"); 67 purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
70 } else if (!g_ascii_strcasecmp(encoding, "iso-8859-1")) { 68 } else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
71 utf8 = g_convert(text, textlen, "UTF-8", "iso-8859-1", NULL, NULL, NULL); 69 glib_encoding = "iso-8859-1";
72 } else if (!g_ascii_strcasecmp(encoding, "ISO-8859-1-Windows-3.1-Latin-1") || 70 } else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
73 !g_ascii_strcasecmp(encoding, "us-ascii")) 71 glib_encoding = "Windows-1252";
74 { 72 } else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
75 utf8 = g_convert(text, textlen, "UTF-8", "Windows-1252", NULL, NULL, NULL); 73 glib_encoding = "UTF-16BE";
76 } else if (!g_ascii_strcasecmp(encoding, "unicode-2-0")) { 74 } else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
77 /* Some official ICQ clients are apparently total crack, 75 purple_debug_warning("oscar", "Unrecognized character encoding \"%s\", attempting to convert to UTF-8 anyway\n", extracted_encoding);
78 * and have been known to save a UTF-8 string converted 76 glib_encoding = extracted_encoding;
79 * from the locale character set to UTF-16 (not from UTF-8 77 }
80 * to UTF-16!) in the away message. This hack should find 78
81 * and do something (un)reasonable with that, and not 79 if (glib_encoding != NULL) {
82 * mess up too much else. */ 80 utf8 = g_convert(text, textlen, "UTF-8", glib_encoding, NULL, NULL, NULL);
83 const gchar *charset = purple_account_get_string(account, "encoding", NULL);
84 if (charset) {
85 gsize len;
86 utf8 = g_convert(text, textlen, charset, "UTF-16BE", &len, NULL, NULL);
87 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) {
88 g_free(utf8);
89 utf8 = NULL;
90 } else {
91 purple_debug_info("oscar", "Used broken ICQ fallback encoding\n");
92 }
93 }
94 if (!utf8)
95 utf8 = g_convert(text, textlen, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
96 } else if (g_ascii_strcasecmp(encoding, "utf-8")) {
97 purple_debug_warning("oscar", "Unrecognized character encoding \"%s\", "
98 "attempting to convert to UTF-8 anyway\n", encoding);
99 utf8 = g_convert(text, textlen, "UTF-8", encoding, NULL, NULL, NULL);
100 } 81 }
101 82
102 /* 83 /*
103 * If utf8 is still NULL then either the encoding is utf-8 or 84 * If utf8 is still NULL then either the encoding is utf-8 or
104 * we have been unable to convert the text to utf-8 from the encoding 85 * we have been unable to convert the text to utf-8 from the encoding
105 * that was specified. So we check if the text is valid utf-8 then 86 * that was specified. So we check if the text is valid utf-8 then
106 * just copy it. 87 * just copy it.
107 */ 88 */
108 if (utf8 == NULL) { 89 if (utf8 == NULL) {
109 if (textlen != 0 && *text != '\0' 90 if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL))
110 && !g_utf8_validate(text, textlen, NULL))
111 utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)")); 91 utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
112 else 92 else
113 utf8 = g_strndup(text, textlen); 93 utf8 = g_strndup(text, textlen);
114 } 94 }
115 95
96 g_free(extracted_encoding);
116 return utf8; 97 return utf8;
117 } 98 }
118 99
119 gchar * 100 gchar *
120 oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg) 101 oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg)