comparison libpurple/protocols/oscar/oscar.c @ 27940:b623c1824f29

propagate from branch 'im.pidgin.pidgin' (head 58fbcc161c5eadf3f307cd460a6ce0a209d908df) to branch 'im.pidgin.pidgin.yaz' (head a378a1d9618c47f5b0e6c67daf613d3c4275f7cf)
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Thu, 16 Apr 2009 05:51:42 +0000
parents b87843de7c6a 1688f7e15530
children 684690dbda4a
comparison
equal deleted inserted replaced
26650:3712d32363dd 27940:b623c1824f29
304 oscar_encoding_to_utf8(PurpleAccount *account, const char *encoding, const char *text, int textlen) 304 oscar_encoding_to_utf8(PurpleAccount *account, const char *encoding, const char *text, int textlen)
305 { 305 {
306 gchar *utf8 = NULL; 306 gchar *utf8 = NULL;
307 307
308 if ((encoding == NULL) || encoding[0] == '\0') { 308 if ((encoding == NULL) || encoding[0] == '\0') {
309 purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n"); 309 purple_debug_info("yaz oscar", "Empty encoding, validate as UTF-8\n");
310 if(g_utf8_validate(text, textlen, NULL)){
311 gsize newlen;
312 utf8 = sanitize_utf(text, textlen, &newlen);
313 goto done;
314 }
315 // not UTF-8
316 purple_debug_info("yaz oscar", "Empty encoding, assuming UTF-16BE\n");
317 sanitize_ucs((gchar *)text, textlen);
318 utf8 = g_convert(text, textlen, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
319 if(utf8){
320 if(!g_utf8_validate(utf8, strlen(utf8), NULL)){
321 purple_debug_info("yaz oscar", "Invalid conversion\n");
322 g_free(utf8);
323 utf8 = NULL;
324 }
325 } else {
326 purple_debug_info("yaz oscar", "Conversion failed\n");
327 }
310 } else if (!g_ascii_strcasecmp(encoding, "iso-8859-1")) { 328 } else if (!g_ascii_strcasecmp(encoding, "iso-8859-1")) {
311 utf8 = g_convert(text, textlen, "UTF-8", "iso-8859-1", NULL, NULL, NULL); 329 utf8 = g_convert(text, textlen, "UTF-8", "iso-8859-1", NULL, NULL, NULL);
312 } else if (!g_ascii_strcasecmp(encoding, "ISO-8859-1-Windows-3.1-Latin-1") || 330 } else if (!g_ascii_strcasecmp(encoding, "ISO-8859-1-Windows-3.1-Latin-1") ||
313 !g_ascii_strcasecmp(encoding, "us-ascii")) 331 !g_ascii_strcasecmp(encoding, "us-ascii"))
314 { 332 {
319 * from the locale character set to UTF-16 (not from UTF-8 337 * from the locale character set to UTF-16 (not from UTF-8
320 * to UTF-16!) in the away message. This hack should find 338 * to UTF-16!) in the away message. This hack should find
321 * and do something (un)reasonable with that, and not 339 * and do something (un)reasonable with that, and not
322 * mess up too much else. */ 340 * mess up too much else. */
323 const gchar *charset = purple_account_get_string(account, "encoding", NULL); 341 const gchar *charset = purple_account_get_string(account, "encoding", NULL);
342 sanitize_ucs((gchar *)text, textlen);
324 if (charset) { 343 if (charset) {
325 gsize len; 344 gsize len;
326 utf8 = g_convert(text, textlen, charset, "UTF-16BE", &len, NULL, NULL); 345 utf8 = g_convert(text, textlen, charset, "UTF-16BE", &len, NULL, NULL);
327 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) { 346 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) {
328 g_free(utf8); 347 g_free(utf8);
350 && !g_utf8_validate(text, textlen, NULL)) 369 && !g_utf8_validate(text, textlen, NULL))
351 utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)")); 370 utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
352 else 371 else
353 utf8 = g_strndup(text, textlen); 372 utf8 = g_strndup(text, textlen);
354 } 373 }
355 374 done:
356 return utf8; 375 return utf8;
357 } 376 }
358 377
359 static gchar * 378 static gchar *
360 oscar_utf8_try_convert(PurpleAccount *account, const gchar *msg) 379 oscar_utf8_try_convert(PurpleAccount *account, const gchar *msg)
375 } 394 }
376 395
377 static gchar * 396 static gchar *
378 purple_plugin_oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback) 397 purple_plugin_oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
379 { 398 {
380 gchar *ret = NULL; 399 gchar *ret = NULL, *ret2 = NULL;
381 GError *err = NULL; 400 GError *err = NULL;
382 401
383 if ((charsetstr == NULL) || (*charsetstr == '\0')) 402 if ((charsetstr == NULL) || (*charsetstr == '\0'))
384 return NULL; 403 return NULL;
385 404
398 ret = g_strndup(data, datalen); 417 ret = g_strndup(data, datalen);
399 else 418 else
400 purple_debug_warning("oscar", "String is not valid UTF-8.\n"); 419 purple_debug_warning("oscar", "String is not valid UTF-8.\n");
401 } 420 }
402 421
403 return ret; 422 ret2 = sanitize_utf(ret, -1, NULL);
423 g_free(ret);
424 return ret2;
404 } 425 }
405 426
406 /** 427 /**
407 * This attemps to decode an incoming IM into a UTF8 string. 428 * This attemps to decode an incoming IM into a UTF8 string.
408 * 429 *
434 charsetstr1 = "ISO-8859-1"; 455 charsetstr1 = "ISO-8859-1";
435 charsetstr2 = "UTF-8"; 456 charsetstr2 = "UTF-8";
436 } else if (charset == AIM_CHARSET_ASCII) { 457 } else if (charset == AIM_CHARSET_ASCII) {
437 /* Should just be "ASCII" */ 458 /* Should just be "ASCII" */
438 charsetstr1 = "ASCII"; 459 charsetstr1 = "ASCII";
439 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 460 charsetstr2 = "UTF-8";
440 } else if (charset == 0x000d) { 461 } else if (charset == AIM_CHARSET_QUIRKUTF8) {
441 /* Mobile AIM client on a Nokia 3100 and an LG VX6000 */ 462 /* Mobile AIM client on a Nokia 3100 and an LG VX6000 */
442 charsetstr1 = "ISO-8859-1"; 463 charsetstr1 = "UTF-8"; //iChat use 0x000d when it sends UTF-8. --yaz
443 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 464 charsetstr2 = "ISO-8859-1";
444 } else { 465 } else {
445 /* Unknown, hope for valid UTF-8... */ 466 /* Unknown, hope for valid UTF-8... */
446 charsetstr1 = "UTF-8"; 467 charsetstr1 = "UTF-8";
447 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 468 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
448 } 469 }
507 PurpleBuddy *b; 528 PurpleBuddy *b;
508 b = purple_find_buddy(account, destbn); 529 b = purple_find_buddy(account, destbn);
509 if ((b != NULL) && (PURPLE_BUDDY_IS_ONLINE(b))) 530 if ((b != NULL) && (PURPLE_BUDDY_IS_ONLINE(b)))
510 { 531 {
511 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, &err); 532 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, &err);
533 botch_ucs(*msg, msglen);
512 if (*msg != NULL) 534 if (*msg != NULL)
513 { 535 {
514 *charset = AIM_CHARSET_UNICODE; 536 *charset = AIM_CHARSET_UNICODE;
515 *charsubset = 0x0000; 537 *charsubset = 0x0000;
516 *msglen_int = msglen; 538 *msglen_int = msglen;
534 556
535 /* 557 /*
536 * XXX - We need a way to only attempt to convert if we KNOW "from" 558 * XXX - We need a way to only attempt to convert if we KNOW "from"
537 * can be converted to "charsetstr" 559 * can be converted to "charsetstr"
538 */ 560 */
561 #ifndef _WIN32
562 /* nosuke reported that this portion caused unexpected
563 * conversion from utf-8 fullwidth tilde/numbers/alphabets to
564 * halfwidth ones on windows environment. --yaz */
539 *msg = g_convert(from, -1, charsetstr, "UTF-8", NULL, &msglen, &err); 565 *msg = g_convert(from, -1, charsetstr, "UTF-8", NULL, &msglen, &err);
540 if (*msg != NULL) { 566 if (*msg != NULL) {
541 *charset = AIM_CHARSET_CUSTOM; 567 *charset = AIM_CHARSET_CUSTOM;
542 *charsubset = 0x0000; 568 *charsubset = 0x0000;
543 *msglen_int = msglen; 569 *msglen_int = msglen;
546 572
547 purple_debug_info("oscar", "Conversion from UTF-8 to %s failed (%s), falling back to unicode.\n", 573 purple_debug_info("oscar", "Conversion from UTF-8 to %s failed (%s), falling back to unicode.\n",
548 charsetstr, err->message); 574 charsetstr, err->message);
549 g_error_free(err); 575 g_error_free(err);
550 err = NULL; 576 err = NULL;
577 #endif
551 578
552 /* 579 /*
553 * Nothing else worked, so send as UTF-16BE. 580 * Nothing else worked, so send as UTF-16BE.
554 */ 581 */
555 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, &err); 582 *msg = g_convert(from, strlen(from), "UTF-16BE", "UTF-8", NULL, &msglen, &err);
583 botch_ucs(*msg, msglen);
556 if (*msg != NULL) { 584 if (*msg != NULL) {
557 *charset = AIM_CHARSET_UNICODE; 585 *charset = AIM_CHARSET_UNICODE;
558 *charsubset = 0x0000; 586 *charsubset = 0x0000;
559 *msglen_int = msglen; 587 *msglen_int = msglen;
560 return; 588 return;
2318 curpart = args->mpmsg.parts; 2346 curpart = args->mpmsg.parts;
2319 while (curpart != NULL) { 2347 while (curpart != NULL) {
2320 tmp = purple_plugin_oscar_decode_im_part(account, userinfo->bn, curpart->charset, 2348 tmp = purple_plugin_oscar_decode_im_part(account, userinfo->bn, curpart->charset,
2321 curpart->charsubset, curpart->data, curpart->datalen); 2349 curpart->charsubset, curpart->data, curpart->datalen);
2322 if (tmp != NULL) { 2350 if (tmp != NULL) {
2351 purple_str_strip_char(tmp, 0x0d); // yaz: strip CR
2352 // purple_debug_info("yaz oscar", "tmp=%s",tmp);
2323 g_string_append(message, tmp); 2353 g_string_append(message, tmp);
2324 g_free(tmp); 2354 g_free(tmp);
2325 } 2355 }
2326 2356
2327 curpart = curpart->next; 2357 curpart = curpart->next;
2420 if (args->type & OSCAR_CAPABILITY_CHAT) 2450 if (args->type & OSCAR_CAPABILITY_CHAT)
2421 { 2451 {
2422 char *encoding, *utf8name, *tmp; 2452 char *encoding, *utf8name, *tmp;
2423 GHashTable *components; 2453 GHashTable *components;
2424 2454
2455 // purple_debug_info("yaz oscar", "chat request %s\n", args->msg);
2425 if (!args->info.chat.roominfo.name || !args->info.chat.roominfo.exchange) { 2456 if (!args->info.chat.roominfo.name || !args->info.chat.roominfo.exchange) {
2426 g_free(message); 2457 g_free(message);
2427 return 1; 2458 return 1;
2428 } 2459 }
2429 encoding = args->encoding ? oscar_encoding_extract(args->encoding) : NULL; 2460 encoding = args->encoding ? oscar_encoding_extract(args->encoding) : NULL;
2442 components = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, 2473 components = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
2443 g_free); 2474 g_free);
2444 g_hash_table_replace(components, g_strdup("room"), utf8name); 2475 g_hash_table_replace(components, g_strdup("room"), utf8name);
2445 g_hash_table_replace(components, g_strdup("exchange"), 2476 g_hash_table_replace(components, g_strdup("exchange"),
2446 g_strdup_printf("%d", args->info.chat.roominfo.exchange)); 2477 g_strdup_printf("%d", args->info.chat.roominfo.exchange));
2478 purple_debug_info("yaz oscar", "about to call serv_got_chat_invite\n");
2479 // purple_debug_info("yaz oscar", "name=%s message=%s\n", name ? name : args->info.chat.roominfo.name, message);
2447 serv_got_chat_invite(gc, 2480 serv_got_chat_invite(gc,
2448 utf8name, 2481 utf8name,
2449 userinfo->bn, 2482 userinfo->bn,
2450 message, 2483 message,
2451 components); 2484 components);
2666 * character in whatever encoding the message was sent in. Type 1 2699 * character in whatever encoding the message was sent in. Type 1
2667 * messages are always made up of only one part, so we can easily account 2700 * messages are always made up of only one part, so we can easily account
2668 * for this suck-ass part of the protocol by splitting the string into at 2701 * for this suck-ass part of the protocol by splitting the string into at
2669 * most 1 baby string. 2702 * most 1 baby string.
2670 */ 2703 */
2671 msg1 = g_strsplit(args->msg, "\376", (args->type == 0x01 ? 1 : 0)); 2704 msg1 = g_strsplit(args->msg, "\376", (args->type == 0x01 ? 1 : 0)); // \376 is 0xfe
2672 for (numtoks=0; msg1[numtoks]; numtoks++); 2705 for (numtoks=0; msg1[numtoks]; numtoks++);
2673 msg2 = (gchar **)g_malloc((numtoks+1)*sizeof(gchar *)); 2706 msg2 = (gchar **)g_malloc((numtoks+1)*sizeof(gchar *));
2674 for (i=0; msg1[i]; i++) { 2707 for (i=0; msg1[i]; i++) {
2675 gchar *uin = g_strdup_printf("%u", args->uin); 2708 gchar *uin = g_strdup_printf("%u", args->uin);
2676 2709
4355 static void 4388 static void
4356 purple_odc_send_im(PeerConnection *conn, const char *message, PurpleMessageFlags imflags) 4389 purple_odc_send_im(PeerConnection *conn, const char *message, PurpleMessageFlags imflags)
4357 { 4390 {
4358 GString *msg; 4391 GString *msg;
4359 GString *data; 4392 GString *data;
4360 gchar *tmp; 4393 guint16 charset;
4361 int tmplen;
4362 guint16 charset, charsubset;
4363 GData *attribs; 4394 GData *attribs;
4364 const char *start, *end, *last; 4395 const char *start, *end, *last;
4365 int oscar_id = 0; 4396 int oscar_id = 0;
4366 4397
4367 msg = g_string_new("<HTML><BODY>"); 4398 msg = g_string_new("<HTML><BODY>");
4417 if (last && *last) 4448 if (last && *last)
4418 g_string_append(msg, last); 4449 g_string_append(msg, last);
4419 4450
4420 g_string_append(msg, "</BODY></HTML>"); 4451 g_string_append(msg, "</BODY></HTML>");
4421 4452
4422 /* Convert the message to a good encoding */ 4453 /* iChat and AIM6 use 0x000d to send UTF8.
4423 purple_plugin_oscar_convert_to_best_encoding(conn->od->gc, 4454 moreover, AIM6 persists only to UTF8! --yaz */
4424 conn->bn, msg->str, &tmp, &tmplen, &charset, &charsubset); 4455 charset = AIM_CHARSET_QUIRKUTF8;
4425 g_string_free(msg, TRUE);
4426 msg = g_string_new_len(tmp, tmplen);
4427 g_free(tmp);
4428 4456
4429 /* Append any binary data that we may have */ 4457 /* Append any binary data that we may have */
4430 if (oscar_id) { 4458 if (oscar_id) {
4431 msg = g_string_append_len(msg, data->str, data->len); 4459 msg = g_string_append_len(msg, data->str, data->len);
4432 msg = g_string_append(msg, "</BINARY>"); 4460 msg = g_string_append(msg, "</BINARY>");
4664 int charset = 0; 4692 int charset = 0;
4665 char *encoded = NULL; 4693 char *encoded = NULL;
4666 4694
4667 charset = oscar_charset_check(str); 4695 charset = oscar_charset_check(str);
4668 if (charset == AIM_CHARSET_UNICODE) { 4696 if (charset == AIM_CHARSET_UNICODE) {
4669 encoded = g_convert(str, -1, "UTF-16BE", "UTF-8", NULL, ret_len, NULL); 4697 encoded = g_convert(str, strlen(str), "UTF-16BE", "UTF-8", NULL, ret_len, NULL);
4698 botch_ucs(encoded, *ret_len);
4670 *encoding = "unicode-2-0"; 4699 *encoding = "unicode-2-0";
4671 } else if (charset == AIM_CHARSET_CUSTOM) { 4700 } else if (charset == AIM_CHARSET_CUSTOM) {
4672 encoded = g_convert(str, -1, "ISO-8859-1", "UTF-8", NULL, ret_len, NULL); 4701 encoded = g_convert(str, -1, "ISO-8859-1", "UTF-8", NULL, ret_len, NULL);
4673 *encoding = "iso-8859-1"; 4702 *encoding = "iso-8859-1";
4674 } else { 4703 } else {
5823 charsetstr = "us-ascii"; 5852 charsetstr = "us-ascii";
5824 else if (charset == AIM_CHARSET_UNICODE) 5853 else if (charset == AIM_CHARSET_UNICODE)
5825 charsetstr = "unicode-2-0"; 5854 charsetstr = "unicode-2-0";
5826 else if (charset == AIM_CHARSET_CUSTOM) 5855 else if (charset == AIM_CHARSET_CUSTOM)
5827 charsetstr = "iso-8859-1"; 5856 charsetstr = "iso-8859-1";
5828 aim_chat_send_im(od, c->conn, 0, buf2, len, charsetstr, "en"); 5857 aim_chat_send_im(od, c->conn, 0, buf2, len, charsetstr, "JA");
5829 g_free(buf2); 5858 g_free(buf2);
5830 g_free(buf); 5859 g_free(buf);
5831 5860
5832 return 0; 5861 return 0;
5833 } 5862 }