annotate libpurple/protocols/oscar/encoding.c @ 32774:069919a0896c

Validate utf8 for a few random strings that we read, in case AOL or ICQ decide to start sending us non-utf8
author Mark Doliner <mark@kingant.net>
date Thu, 03 May 2012 09:28:11 +0000
parents 52801bade70e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
1 /*
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
2 * Purple's oscar protocol plugin
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
3 * This file is the legal property of its developers.
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
4 * Please see the AUTHORS file distributed alongside this file.
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
5 *
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
6 * This library is free software; you can redistribute it and/or
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
8 * License as published by the Free Software Foundation; either
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
9 * version 2 of the License, or (at your option) any later version.
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
10 *
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
11 * This library is distributed in the hope that it will be useful,
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
14 * Lesser General Public License for more details.
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
15 *
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
17 * License along with this library; if not, write to the Free Software
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
19 */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
20
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
21 #include "encoding.h"
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
22
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
23 static gchar *
31417
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
24 encoding_multi_convert_to_utf8(const gchar *text, gssize textlen, const gchar *encodings, GError **error, gboolean fallback)
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
25 {
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
26 gchar *utf8 = NULL;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
27 const gchar *begin = encodings;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
28 const gchar *end = NULL;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
29 gchar *curr_encoding = NULL; /* allocated buffer for encoding name */
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
30 const gchar *curr_encoding_ro = NULL; /* read-only encoding name */
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
31
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
32 if (!encodings) {
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
33 purple_debug_error("oscar", "encodings is NULL");
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
34 return NULL;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
35 }
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
36
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
37 for (;;)
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
38 {
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
39 /* extract next encoding */
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
40 end = strchr(begin, ',');
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
41 if (!end) {
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
42 curr_encoding_ro = begin;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
43 } else { /* allocate buffer for encoding */
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
44 curr_encoding = g_strndup(begin, end - begin);
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
45 if (!curr_encoding) {
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
46 purple_debug_error("oscar", "Error allocating memory for encoding");
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
47 break;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
48 }
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
49 curr_encoding_ro = curr_encoding;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
50 }
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
51
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
52 if (!g_ascii_strcasecmp(curr_encoding_ro, "utf-8") && g_utf8_validate(text, textlen, NULL)) {
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
53 break;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
54 }
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
55
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
56 utf8 = g_convert(text, textlen, "UTF-8", curr_encoding_ro, NULL, NULL, NULL);
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
57
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
58 if (!end) /* last occurence. do not free curr_encoding: buffer was'nt allocated */
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
59 break;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
60
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
61 g_free(curr_encoding); /* free allocated buffer for encoding here */
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
62
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
63 if (utf8) /* text was successfully converted */
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
64 break;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
65
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
66 begin = end + 1;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
67 }
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
68
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
69 if (!utf8 && fallback)
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
70 { /* "begin" points to last encoding */
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
71 utf8 = g_convert_with_fallback(text, textlen, "UTF-8", begin, "?", NULL, NULL, error);
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
72 }
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
73
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
74 return utf8;
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
75 }
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
76
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
77 static gchar *
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
78 encoding_extract(const char *encoding)
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
79 {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
80 char *begin, *end;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
81
30392
a4f579485ce6 encoding can be NULL in encoding_extract(); this is not an error and
ivan.komarov@soc.pidgin.im
parents: 30391
diff changeset
82 if (encoding == NULL) {
a4f579485ce6 encoding can be NULL in encoding_extract(); this is not an error and
ivan.komarov@soc.pidgin.im
parents: 30391
diff changeset
83 return NULL;
a4f579485ce6 encoding can be NULL in encoding_extract(); this is not an error and
ivan.komarov@soc.pidgin.im
parents: 30391
diff changeset
84 }
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
85
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
86 if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") &&
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
87 !g_str_has_prefix(encoding, "text/x-aolrtf; charset=") &&
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
88 !g_str_has_prefix(encoding, "text/plain; charset=")) {
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
89 return g_strdup(encoding);
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
90 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
91
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
92 begin = strchr(encoding, '"');
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
93 end = strrchr(encoding, '"');
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
94
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
95 if ((begin == NULL) || (end == NULL) || (begin >= end)) {
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
96 return g_strdup(encoding);
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
97 }
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
98
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
99 return g_strndup(begin+1, (end-1) - begin);
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
100 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
101
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
102 gchar *
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
103 oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen)
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
104 {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
105 gchar *utf8 = NULL;
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
106 const gchar *glib_encoding = NULL;
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
107 gchar *extracted_encoding = encoding_extract(encoding);
31086
a8cc50c2279f Remove trailing whitespace
Richard Laager <rlaager@wiktel.com>
parents: 30894
diff changeset
108
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
109 if (extracted_encoding == NULL || *extracted_encoding == '\0') {
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
110 purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
111 } else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
112 glib_encoding = "iso-8859-1";
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
113 } else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
114 glib_encoding = "Windows-1252";
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
115 } else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
116 glib_encoding = "UTF-16BE";
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
117 } else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
118 glib_encoding = extracted_encoding;
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
119 }
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
120
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
121 if (glib_encoding != NULL) {
31417
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
122 utf8 = encoding_multi_convert_to_utf8(text, textlen, glib_encoding, NULL, FALSE);
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
123 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
124
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
125 /*
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
126 * If utf8 is still NULL then either the encoding is utf-8 or
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
127 * we have been unable to convert the text to utf-8 from the encoding
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
128 * that was specified. So we check if the text is valid utf-8 then
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
129 * just copy it.
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
130 */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
131 if (utf8 == NULL) {
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
132 if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL))
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
133 utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
134 else
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
135 utf8 = g_strndup(text, textlen);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
136 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
137
30386
ca90b6c27eb8 Refactored oscar_encoding_to_utf8().
ivan.komarov@soc.pidgin.im
parents: 30385
diff changeset
138 g_free(extracted_encoding);
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
139 return utf8;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
140 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
141
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
142 gchar *
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
143 oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg)
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
144 {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
145 const char *charset = NULL;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
146 char *ret = NULL;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
147
30401
a6511abec788 A couple of refactorings related to oscar_utf8_try_convert().
ivan.komarov@soc.pidgin.im
parents: 30397
diff changeset
148 if (msg == NULL)
a6511abec788 A couple of refactorings related to oscar_utf8_try_convert().
ivan.komarov@soc.pidgin.im
parents: 30397
diff changeset
149 return NULL;
a6511abec788 A couple of refactorings related to oscar_utf8_try_convert().
ivan.komarov@soc.pidgin.im
parents: 30397
diff changeset
150
a6511abec788 A couple of refactorings related to oscar_utf8_try_convert().
ivan.komarov@soc.pidgin.im
parents: 30397
diff changeset
151 if (g_utf8_validate(msg, -1, NULL))
a6511abec788 A couple of refactorings related to oscar_utf8_try_convert().
ivan.komarov@soc.pidgin.im
parents: 30397
diff changeset
152 return g_strdup(msg);
a6511abec788 A couple of refactorings related to oscar_utf8_try_convert().
ivan.komarov@soc.pidgin.im
parents: 30397
diff changeset
153
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
154 if (od->icq)
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
155 charset = purple_account_get_string(account, "encoding", NULL);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
156
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
157 if(charset && *charset)
31417
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
158 ret = encoding_multi_convert_to_utf8(msg, -1, charset, NULL, FALSE);
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
159
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
160 if(!ret)
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
161 ret = purple_utf8_try_convert(msg);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
162
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
163 return ret;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
164 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
165
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
166 static gchar *
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
167 oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
168 {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
169 gchar *ret = NULL;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
170 GError *err = NULL;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
171
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
172 if ((charsetstr == NULL) || (*charsetstr == '\0'))
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
173 return NULL;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
174
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
175 if (g_ascii_strcasecmp("UTF-8", charsetstr)) {
31417
52801bade70e Currently oscar (ICQ) protocol does not support comma-separated list of
loentar@google.com
parents: 31086
diff changeset
176 ret = encoding_multi_convert_to_utf8(data, datalen, charsetstr, &err, fallback);
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
177 if (err != NULL) {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
178 purple_debug_warning("oscar", "Conversion from %s failed: %s.\n",
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
179 charsetstr, err->message);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
180 g_error_free(err);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
181 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
182 } else {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
183 if (g_utf8_validate(data, datalen, NULL))
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
184 ret = g_strndup(data, datalen);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
185 else
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
186 purple_debug_warning("oscar", "String is not valid UTF-8.\n");
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
187 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
188
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
189 return ret;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
190 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
191
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
192 gchar *
30391
5661f30d1b8e Got rid of receiving multipart messages over channel 1, which simplified the code
ivan.komarov@soc.pidgin.im
parents: 30386
diff changeset
193 oscar_decode_im(PurpleAccount *account, const char *sourcebn, guint16 charset, const gchar *data, gsize datalen)
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
194 {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
195 gchar *ret = NULL;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
196 /* charsetstr1 is always set to what the correct encoding should be. */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
197 const gchar *charsetstr1, *charsetstr2, *charsetstr3 = NULL;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
198
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
199 if ((datalen == 0) || (data == NULL))
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
200 return NULL;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
201
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
202 if (charset == AIM_CHARSET_UNICODE) {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
203 charsetstr1 = "UTF-16BE";
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
204 charsetstr2 = "UTF-8";
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
205 } else if (charset == AIM_CHARSET_LATIN_1) {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
206 if ((sourcebn != NULL) && oscar_util_valid_name_icq(sourcebn))
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
207 charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
208 else
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
209 charsetstr1 = "ISO-8859-1";
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
210 charsetstr2 = "UTF-8";
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
211 } else if (charset == AIM_CHARSET_ASCII) {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
212 /* Should just be "ASCII" */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
213 charsetstr1 = "ASCII";
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
214 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
215 } else if (charset == 0x000d) {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
216 /* iChat sending unicode over a Direct IM connection = UTF-8 */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
217 /* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
218 charsetstr1 = "UTF-8";
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
219 charsetstr2 = "ISO-8859-1";
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
220 charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
221 } else {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
222 /* Unknown, hope for valid UTF-8... */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
223 charsetstr1 = "UTF-8";
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
224 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
225 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
226
30391
5661f30d1b8e Got rid of receiving multipart messages over channel 1, which simplified the code
ivan.komarov@soc.pidgin.im
parents: 30386
diff changeset
227 purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT ", choice1=%s, choice2=%s, choice3=%s\n",
5661f30d1b8e Got rid of receiving multipart messages over channel 1, which simplified the code
ivan.komarov@soc.pidgin.im
parents: 30386
diff changeset
228 charset, datalen, charsetstr1, charsetstr2, (charsetstr3 ? charsetstr3 : ""));
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
229
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
230 ret = oscar_convert_to_utf8(data, datalen, charsetstr1, FALSE);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
231 if (ret == NULL) {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
232 if (charsetstr3 != NULL) {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
233 /* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
234 ret = oscar_convert_to_utf8(data, datalen, charsetstr2, FALSE);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
235 if (ret == NULL)
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
236 ret = oscar_convert_to_utf8(data, datalen, charsetstr3, TRUE);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
237 } else {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
238 /* Try charsetstr2, allowing substitutions */
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
239 ret = oscar_convert_to_utf8(data, datalen, charsetstr2, TRUE);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
240 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
241 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
242 if (ret == NULL) {
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
243 char *str, *salvage, *tmp;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
244
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
245 str = g_malloc(datalen + 1);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
246 strncpy(str, data, datalen);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
247 str[datalen] = '\0';
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
248 salvage = purple_utf8_salvage(str);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
249 tmp = g_strdup_printf(_("(There was an error receiving this message. Either you and %s have different encodings selected, or %s has a buggy client.)"),
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
250 sourcebn, sourcebn);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
251 ret = g_strdup_printf("%s %s", salvage, tmp);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
252 g_free(tmp);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
253 g_free(str);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
254 g_free(salvage);
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
255 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
256
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
257 return ret;
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
258 }
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
259
30394
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
260 static guint16
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
261 get_simplest_charset(const char *utf8)
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
262 {
30397
1f3ef11a9690 My esteemed mentor caught me red-handed. Thanks Mark!
ivan.komarov@soc.pidgin.im
parents: 30394
diff changeset
263 while (*utf8)
30394
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
264 {
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
265 if ((unsigned char)(*utf8) > 0x7f) {
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
266 /* not ASCII! */
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
267 return AIM_CHARSET_UNICODE;
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
268 }
30397
1f3ef11a9690 My esteemed mentor caught me red-handed. Thanks Mark!
ivan.komarov@soc.pidgin.im
parents: 30394
diff changeset
269 utf8++;
30394
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
270 }
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
271 return AIM_CHARSET_ASCII;
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
272 }
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
273
30385
9d386bf63eab Stop using custom encodings (and LATIN-1, for that matter) for sending
ivan.komarov@soc.pidgin.im
parents: 30383
diff changeset
274 gchar *
30394
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
275 oscar_encode_im(const gchar *msg, gsize *result_len, guint16 *charset, gchar **charsetstr)
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
276 {
30394
ed520e6e972b Renames and cleanups.
ivan.komarov@soc.pidgin.im
parents: 30392
diff changeset
277 guint16 msg_charset = get_simplest_charset(msg);
30385
9d386bf63eab Stop using custom encodings (and LATIN-1, for that matter) for sending
ivan.komarov@soc.pidgin.im
parents: 30383
diff changeset
278 if (charset != NULL) {
9d386bf63eab Stop using custom encodings (and LATIN-1, for that matter) for sending
ivan.komarov@soc.pidgin.im
parents: 30383
diff changeset
279 *charset = msg_charset;
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
280 }
30385
9d386bf63eab Stop using custom encodings (and LATIN-1, for that matter) for sending
ivan.komarov@soc.pidgin.im
parents: 30383
diff changeset
281 if (charsetstr != NULL) {
9d386bf63eab Stop using custom encodings (and LATIN-1, for that matter) for sending
ivan.komarov@soc.pidgin.im
parents: 30383
diff changeset
282 *charsetstr = msg_charset == AIM_CHARSET_ASCII ? "us-ascii" : "unicode-2-0";
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
283 }
30385
9d386bf63eab Stop using custom encodings (and LATIN-1, for that matter) for sending
ivan.komarov@soc.pidgin.im
parents: 30383
diff changeset
284 return g_convert(msg, -1, msg_charset == AIM_CHARSET_ASCII ? "ASCII" : "UTF-16BE", "UTF-8", NULL, result_len, NULL);
30383
502f25fd81b8 Forgot to add encoding.c.
ivan.komarov@soc.pidgin.im
parents:
diff changeset
285 }