comparison libpurple/protocols/oscar/oscar.c @ 27860:28a7992d37b2

propagate from branch 'im.pidgin.pidgin' (head 3008dd1f70a9e9ddc2bbe30c0fae7aba334836fe) to branch 'im.pidgin.pidgin.yaz' (head 6a95d36de1e6e9696d508a822e4d08106ac42092)
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Wed, 02 Jul 2008 16:07:07 +0000
parents c6b29adc22d1 a5bb767b8644
children 382e7565e628
comparison
equal deleted inserted replaced
23410:f37b3e57fb92 27860:28a7992d37b2
302 oscar_encoding_to_utf8(PurpleAccount *account, const char *encoding, const char *text, int textlen) 302 oscar_encoding_to_utf8(PurpleAccount *account, const char *encoding, const char *text, int textlen)
303 { 303 {
304 gchar *utf8 = NULL; 304 gchar *utf8 = NULL;
305 305
306 if ((encoding == NULL) || encoding[0] == '\0') { 306 if ((encoding == NULL) || encoding[0] == '\0') {
307 purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n"); 307 purple_debug_info("yaz oscar", "Empty encoding, validate as UTF-8\n");
308 if(g_utf8_validate(text, textlen, NULL)){
309 gsize newlen;
310 utf8 = sanitize_utf(text, textlen, &newlen);
311 goto done;
312 }
313 // not UTF-8
314 purple_debug_info("yaz oscar", "Empty encoding, assuming UTF-16BE\n");
315 sanitize_ucs((gchar *)text, textlen);
316 utf8 = g_convert(text, textlen, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
317 if(utf8){
318 if(!g_utf8_validate(utf8, strlen(utf8), NULL)){
319 purple_debug_info("yaz oscar", "Invalid conversion\n");
320 g_free(utf8);
321 utf8 = NULL;
322 }
323 } else {
324 purple_debug_info("yaz oscar", "Conversion failed\n");
325 }
308 } else if (!g_ascii_strcasecmp(encoding, "iso-8859-1")) { 326 } else if (!g_ascii_strcasecmp(encoding, "iso-8859-1")) {
309 utf8 = g_convert(text, textlen, "UTF-8", "iso-8859-1", NULL, NULL, NULL); 327 utf8 = g_convert(text, textlen, "UTF-8", "iso-8859-1", NULL, NULL, NULL);
310 } else if (!g_ascii_strcasecmp(encoding, "ISO-8859-1-Windows-3.1-Latin-1") || 328 } else if (!g_ascii_strcasecmp(encoding, "ISO-8859-1-Windows-3.1-Latin-1") ||
311 !g_ascii_strcasecmp(encoding, "us-ascii")) 329 !g_ascii_strcasecmp(encoding, "us-ascii"))
312 { 330 {
317 * from the locale character set to UTF-16 (not from UTF-8 335 * from the locale character set to UTF-16 (not from UTF-8
318 * to UTF-16!) in the away message. This hack should find 336 * to UTF-16!) in the away message. This hack should find
319 * and do something (un)reasonable with that, and not 337 * and do something (un)reasonable with that, and not
320 * mess up too much else. */ 338 * mess up too much else. */
321 const gchar *charset = purple_account_get_string(account, "encoding", NULL); 339 const gchar *charset = purple_account_get_string(account, "encoding", NULL);
340 sanitize_ucs((gchar *)text, textlen);
322 if (charset) { 341 if (charset) {
323 gsize len; 342 gsize len;
324 utf8 = g_convert(text, textlen, charset, "UTF-16BE", &len, NULL, NULL); 343 utf8 = g_convert(text, textlen, charset, "UTF-16BE", &len, NULL, NULL);
325 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) { 344 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) {
326 g_free(utf8); 345 g_free(utf8);
348 && !g_utf8_validate(text, textlen, NULL)) 367 && !g_utf8_validate(text, textlen, NULL))
349 utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)")); 368 utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
350 else 369 else
351 utf8 = g_strndup(text, textlen); 370 utf8 = g_strndup(text, textlen);
352 } 371 }
353 372 done:
354 return utf8; 373 return utf8;
355 } 374 }
356 375
357 static gchar * 376 static gchar *
358 oscar_utf8_try_convert(PurpleAccount *account, const gchar *msg) 377 oscar_utf8_try_convert(PurpleAccount *account, const gchar *msg)
373 } 392 }
374 393
375 static gchar * 394 static gchar *
376 purple_plugin_oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback) 395 purple_plugin_oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
377 { 396 {
378 gchar *ret = NULL; 397 gchar *ret = NULL, *ret2 = NULL;
379 GError *err = NULL; 398 GError *err = NULL;
380 399
381 if ((charsetstr == NULL) || (*charsetstr == '\0')) 400 if ((charsetstr == NULL) || (*charsetstr == '\0'))
382 return NULL; 401 return NULL;
383 402
396 ret = g_strndup(data, datalen); 415 ret = g_strndup(data, datalen);
397 else 416 else
398 purple_debug_warning("oscar", "String is not valid UTF-8.\n"); 417 purple_debug_warning("oscar", "String is not valid UTF-8.\n");
399 } 418 }
400 419
401 return ret; 420 ret2 = sanitize_utf(ret, -1, NULL);
421 g_free(ret);
422 return ret2;
402 } 423 }
403 424
404 /** 425 /**
405 * This attemps to decode an incoming IM into a UTF8 string. 426 * This attemps to decode an incoming IM into a UTF8 string.
406 * 427 *
432 charsetstr1 = "ISO-8859-1"; 453 charsetstr1 = "ISO-8859-1";
433 charsetstr2 = "UTF-8"; 454 charsetstr2 = "UTF-8";
434 } else if (charset == AIM_CHARSET_ASCII) { 455 } else if (charset == AIM_CHARSET_ASCII) {
435 /* Should just be "ASCII" */ 456 /* Should just be "ASCII" */
436 charsetstr1 = "ASCII"; 457 charsetstr1 = "ASCII";
437 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 458 charsetstr2 = "UTF-8";
438 } else if (charset == 0x000d) { 459 } else if (charset == AIM_CHARSET_QUIRKUTF8) {
439 /* Mobile AIM client on a Nokia 3100 and an LG VX6000 */ 460 /* Mobile AIM client on a Nokia 3100 and an LG VX6000 */
440 charsetstr1 = "ISO-8859-1"; 461 charsetstr1 = "UTF-8"; //iChat use 0x000d when it sends UTF-8. --yaz
441 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 462 charsetstr2 = "ISO-8859-1";
442 } else { 463 } else {
443 /* Unknown, hope for valid UTF-8... */ 464 /* Unknown, hope for valid UTF-8... */
444 charsetstr1 = "UTF-8"; 465 charsetstr1 = "UTF-8";
445 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 466 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
446 } 467 }
504 { 525 {
505 PurpleBuddy *b; 526 PurpleBuddy *b;
506 b = purple_find_buddy(account, destsn); 527 b = purple_find_buddy(account, destsn);
507 if ((b != NULL) && (PURPLE_BUDDY_IS_ONLINE(b))) 528 if ((b != NULL) && (PURPLE_BUDDY_IS_ONLINE(b)))
508 { 529 {
509 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, NULL); 530 *msg = g_convert(from, strlen(from), "UTF-16BE", "UTF-8", NULL, &msglen, NULL);
531 botch_ucs(*msg, msglen);
510 if (*msg != NULL) 532 if (*msg != NULL)
511 { 533 {
512 *charset = AIM_CHARSET_UNICODE; 534 *charset = AIM_CHARSET_UNICODE;
513 *charsubset = 0x0000; 535 *charsubset = 0x0000;
514 *msglen_int = msglen; 536 *msglen_int = msglen;
527 549
528 /* 550 /*
529 * XXX - We need a way to only attempt to convert if we KNOW "from" 551 * XXX - We need a way to only attempt to convert if we KNOW "from"
530 * can be converted to "charsetstr" 552 * can be converted to "charsetstr"
531 */ 553 */
554 #ifndef _WIN32
555 /* nosuke reported that this portion caused unexpected
556 * conversion from utf-8 fullwidth tilde/numbers/alphabets to
557 * halfwidth ones on windows environment. --yaz */
532 *msg = g_convert(from, -1, charsetstr, "UTF-8", NULL, &msglen, NULL); 558 *msg = g_convert(from, -1, charsetstr, "UTF-8", NULL, &msglen, NULL);
533 if (*msg != NULL) { 559 if (*msg != NULL) {
534 *charset = AIM_CHARSET_CUSTOM; 560 *charset = AIM_CHARSET_CUSTOM;
535 *charsubset = 0x0000; 561 *charsubset = 0x0000;
536 *msglen_int = msglen; 562 *msglen_int = msglen;
537 return; 563 return;
538 } 564 }
565 #endif
539 566
540 /* 567 /*
541 * Nothing else worked, so send as UTF-16BE. 568 * Nothing else worked, so send as UTF-16BE.
542 */ 569 */
543 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, &err); 570 *msg = g_convert(from, strlen(from), "UTF-16BE", "UTF-8", NULL, &msglen, &err);
571 botch_ucs(*msg, msglen);
544 if (*msg != NULL) { 572 if (*msg != NULL) {
545 *charset = AIM_CHARSET_UNICODE; 573 *charset = AIM_CHARSET_UNICODE;
546 *charsubset = 0x0000; 574 *charsubset = 0x0000;
547 *msglen_int = msglen; 575 *msglen_int = msglen;
548 return; 576 return;
2166 curpart = args->mpmsg.parts; 2194 curpart = args->mpmsg.parts;
2167 while (curpart != NULL) { 2195 while (curpart != NULL) {
2168 tmp = purple_plugin_oscar_decode_im_part(account, userinfo->sn, curpart->charset, 2196 tmp = purple_plugin_oscar_decode_im_part(account, userinfo->sn, curpart->charset,
2169 curpart->charsubset, curpart->data, curpart->datalen); 2197 curpart->charsubset, curpart->data, curpart->datalen);
2170 if (tmp != NULL) { 2198 if (tmp != NULL) {
2199 purple_str_strip_char(tmp, 0x0d); // yaz: strip CR
2200 // purple_debug_info("yaz oscar", "tmp=%s",tmp);
2171 g_string_append(message, tmp); 2201 g_string_append(message, tmp);
2172 g_free(tmp); 2202 g_free(tmp);
2173 } 2203 }
2174 2204
2175 curpart = curpart->next; 2205 curpart = curpart->next;
2268 if (args->type & OSCAR_CAPABILITY_CHAT) 2298 if (args->type & OSCAR_CAPABILITY_CHAT)
2269 { 2299 {
2270 char *encoding, *utf8name, *tmp; 2300 char *encoding, *utf8name, *tmp;
2271 GHashTable *components; 2301 GHashTable *components;
2272 2302
2303 // purple_debug_info("yaz oscar", "chat request %s\n", args->msg);
2273 if (!args->info.chat.roominfo.name || !args->info.chat.roominfo.exchange) { 2304 if (!args->info.chat.roominfo.name || !args->info.chat.roominfo.exchange) {
2274 g_free(message); 2305 g_free(message);
2275 return 1; 2306 return 1;
2276 } 2307 }
2277 encoding = args->encoding ? oscar_encoding_extract(args->encoding) : NULL; 2308 encoding = args->encoding ? oscar_encoding_extract(args->encoding) : NULL;
2290 components = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, 2321 components = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
2291 g_free); 2322 g_free);
2292 g_hash_table_replace(components, g_strdup("room"), utf8name); 2323 g_hash_table_replace(components, g_strdup("room"), utf8name);
2293 g_hash_table_replace(components, g_strdup("exchange"), 2324 g_hash_table_replace(components, g_strdup("exchange"),
2294 g_strdup_printf("%d", args->info.chat.roominfo.exchange)); 2325 g_strdup_printf("%d", args->info.chat.roominfo.exchange));
2326 purple_debug_info("yaz oscar", "about to call serv_got_chat_invite\n");
2327 // purple_debug_info("yaz oscar", "name=%s message=%s\n", name ? name : args->info.chat.roominfo.name, message);
2295 serv_got_chat_invite(gc, 2328 serv_got_chat_invite(gc,
2296 utf8name, 2329 utf8name,
2297 userinfo->sn, 2330 userinfo->sn,
2298 message, 2331 message,
2299 components); 2332 components);
2511 * character in whatever encoding the message was sent in. Type 1 2544 * character in whatever encoding the message was sent in. Type 1
2512 * messages are always made up of only one part, so we can easily account 2545 * messages are always made up of only one part, so we can easily account
2513 * for this suck-ass part of the protocol by splitting the string into at 2546 * for this suck-ass part of the protocol by splitting the string into at
2514 * most 1 baby string. 2547 * most 1 baby string.
2515 */ 2548 */
2516 msg1 = g_strsplit(args->msg, "\376", (args->type == 0x01 ? 1 : 0)); 2549 msg1 = g_strsplit(args->msg, "\376", (args->type == 0x01 ? 1 : 0)); // \376 is 0xfe
2517 for (numtoks=0; msg1[numtoks]; numtoks++); 2550 for (numtoks=0; msg1[numtoks]; numtoks++);
2518 msg2 = (gchar **)g_malloc((numtoks+1)*sizeof(gchar *)); 2551 msg2 = (gchar **)g_malloc((numtoks+1)*sizeof(gchar *));
2519 for (i=0; msg1[i]; i++) { 2552 for (i=0; msg1[i]; i++) {
2520 gchar *uin = g_strdup_printf("%u", args->uin); 2553 gchar *uin = g_strdup_printf("%u", args->uin);
2521 2554
4188 static void 4221 static void
4189 purple_odc_send_im(PeerConnection *conn, const char *message, PurpleMessageFlags imflags) 4222 purple_odc_send_im(PeerConnection *conn, const char *message, PurpleMessageFlags imflags)
4190 { 4223 {
4191 GString *msg; 4224 GString *msg;
4192 GString *data; 4225 GString *data;
4193 gchar *tmp; 4226 guint16 charset;
4194 int tmplen;
4195 guint16 charset, charsubset;
4196 GData *attribs; 4227 GData *attribs;
4197 const char *start, *end, *last; 4228 const char *start, *end, *last;
4198 int oscar_id = 0; 4229 int oscar_id = 0;
4199 4230
4200 msg = g_string_new("<HTML><BODY>"); 4231 msg = g_string_new("<HTML><BODY>");
4250 if (last && *last) 4281 if (last && *last)
4251 g_string_append(msg, last); 4282 g_string_append(msg, last);
4252 4283
4253 g_string_append(msg, "</BODY></HTML>"); 4284 g_string_append(msg, "</BODY></HTML>");
4254 4285
4255 /* Convert the message to a good encoding */ 4286 /* iChat and AIM6 use 0x000d to send UTF8. moreover, AIM6 persists only to UTF8! --yaz */
4256 purple_plugin_oscar_convert_to_best_encoding(conn->od->gc, 4287 charset = AIM_CHARSET_QUIRKUTF8;
4257 conn->sn, msg->str, &tmp, &tmplen, &charset, &charsubset);
4258 g_string_free(msg, TRUE);
4259 msg = g_string_new_len(tmp, tmplen);
4260 g_free(tmp);
4261 4288
4262 /* Append any binary data that we may have */ 4289 /* Append any binary data that we may have */
4263 if (oscar_id) { 4290 if (oscar_id) {
4264 msg = g_string_append_len(msg, data->str, data->len); 4291 msg = g_string_append_len(msg, data->str, data->len);
4265 msg = g_string_append(msg, "</BINARY>"); 4292 msg = g_string_append(msg, "</BINARY>");
4497 int charset = 0; 4524 int charset = 0;
4498 char *encoded = NULL; 4525 char *encoded = NULL;
4499 4526
4500 charset = oscar_charset_check(str); 4527 charset = oscar_charset_check(str);
4501 if (charset == AIM_CHARSET_UNICODE) { 4528 if (charset == AIM_CHARSET_UNICODE) {
4502 encoded = g_convert(str, -1, "UTF-16BE", "UTF-8", NULL, ret_len, NULL); 4529 encoded = g_convert(str, strlen(str), "UTF-16BE", "UTF-8", NULL, ret_len, NULL);
4530 botch_ucs(encoded, *ret_len);
4503 *encoding = "unicode-2-0"; 4531 *encoding = "unicode-2-0";
4504 } else if (charset == AIM_CHARSET_CUSTOM) { 4532 } else if (charset == AIM_CHARSET_CUSTOM) {
4505 encoded = g_convert(str, -1, "ISO-8859-1", "UTF-8", NULL, ret_len, NULL); 4533 encoded = g_convert(str, -1, "ISO-8859-1", "UTF-8", NULL, ret_len, NULL);
4506 *encoding = "iso-8859-1"; 4534 *encoding = "iso-8859-1";
4507 } else { 4535 } else {
5628 charsetstr = "us-ascii"; 5656 charsetstr = "us-ascii";
5629 else if (charset == AIM_CHARSET_UNICODE) 5657 else if (charset == AIM_CHARSET_UNICODE)
5630 charsetstr = "unicode-2-0"; 5658 charsetstr = "unicode-2-0";
5631 else if (charset == AIM_CHARSET_CUSTOM) 5659 else if (charset == AIM_CHARSET_CUSTOM)
5632 charsetstr = "iso-8859-1"; 5660 charsetstr = "iso-8859-1";
5633 aim_chat_send_im(od, c->conn, 0, buf2, len, charsetstr, "en"); 5661 aim_chat_send_im(od, c->conn, 0, buf2, len, charsetstr, "JA");
5634 g_free(buf2); 5662 g_free(buf2);
5635 g_free(buf); 5663 g_free(buf);
5636 5664
5637 return 0; 5665 return 0;
5638 } 5666 }