comparison libpurple/protocols/oscar/oscar.c @ 22424:7b4d39ffd563

Change OSCAR to use UTF-16 rather than UCS-2 conversions. Apparently some iconv implementations (notably win_iconv) are dropping UCS-2 support, due to the impression that it is obsolete. For BMP characters, UTF-16 should be bitwise identical to UCS-2, so if OSCAR really *is* UCS-2, this shouldn't hurt anything. If it turns out it's capable of UTF-16, well, then, more's the better.
author Ethan Blanton <elb@pidgin.im>
date Fri, 07 Mar 2008 04:01:20 +0000
parents 5762dcb1909c
children c2b9399d95b2 9cd7baa0627e
comparison
equal deleted inserted replaced
22422:361b2e61a219 22424:7b4d39ffd563
243 } 243 }
244 i++; 244 i++;
245 } 245 }
246 246
247 /* 247 /*
248 * Must we send this message as UNICODE (in the UCS-2BE encoding)? 248 * Must we send this message as UNICODE (in the UTF-16BE encoding)?
249 */ 249 */
250 while (utf8[i]) 250 while (utf8[i])
251 { 251 {
252 /* ISO-8859-1 is 0x00-0xbf in the first byte 252 /* ISO-8859-1 is 0x00-0xbf in the first byte
253 * followed by 0xc0-0xc3 in the second */ 253 * followed by 0xc0-0xc3 in the second */
312 { 312 {
313 utf8 = g_convert(text, textlen, "UTF-8", "Windows-1252", NULL, NULL, NULL); 313 utf8 = g_convert(text, textlen, "UTF-8", "Windows-1252", NULL, NULL, NULL);
314 } else if (!g_ascii_strcasecmp(encoding, "unicode-2-0")) { 314 } else if (!g_ascii_strcasecmp(encoding, "unicode-2-0")) {
315 /* Some official ICQ clients are apparently total crack, 315 /* Some official ICQ clients are apparently total crack,
316 * and have been known to save a UTF-8 string converted 316 * and have been known to save a UTF-8 string converted
317 * from the locale character set to UCS-2 (not from UTF-8 317 * from the locale character set to UTF-16 (not from UTF-8
318 * to UCS-2!) in the away message. This hack should find 318 * to UTF-16!) in the away message. This hack should find
319 * and do something (un)reasonable with that, and not 319 * and do something (un)reasonable with that, and not
320 * mess up too much else. */ 320 * mess up too much else. */
321 const gchar *charset = purple_account_get_string(account, "encoding", NULL); 321 const gchar *charset = purple_account_get_string(account, "encoding", NULL);
322 if (charset) { 322 if (charset) {
323 gsize len; 323 gsize len;
324 utf8 = g_convert(text, textlen, charset, "UCS-2BE", &len, NULL, NULL); 324 utf8 = g_convert(text, textlen, charset, "UTF-16BE", &len, NULL, NULL);
325 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) { 325 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) {
326 g_free(utf8); 326 g_free(utf8);
327 utf8 = NULL; 327 utf8 = NULL;
328 } else { 328 } else {
329 purple_debug_info("oscar", "Used broken ICQ fallback encoding\n"); 329 purple_debug_info("oscar", "Used broken ICQ fallback encoding\n");
330 } 330 }
331 } 331 }
332 if (!utf8) 332 if (!utf8)
333 utf8 = g_convert(text, textlen, "UTF-8", "UCS-2BE", NULL, NULL, NULL); 333 utf8 = g_convert(text, textlen, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
334 } else if (g_ascii_strcasecmp(encoding, "utf-8")) { 334 } else if (g_ascii_strcasecmp(encoding, "utf-8")) {
335 purple_debug_warning("oscar", "Unrecognized character encoding \"%s\", " 335 purple_debug_warning("oscar", "Unrecognized character encoding \"%s\", "
336 "attempting to convert to UTF-8 anyway\n", encoding); 336 "attempting to convert to UTF-8 anyway\n", encoding);
337 utf8 = g_convert(text, textlen, "UTF-8", encoding, NULL, NULL, NULL); 337 utf8 = g_convert(text, textlen, "UTF-8", encoding, NULL, NULL, NULL);
338 } 338 }
421 421
422 if ((datalen == 0) || (data == NULL)) 422 if ((datalen == 0) || (data == NULL))
423 return NULL; 423 return NULL;
424 424
425 if (charset == AIM_CHARSET_UNICODE) { 425 if (charset == AIM_CHARSET_UNICODE) {
426 charsetstr1 = "UCS-2BE"; 426 charsetstr1 = "UTF-16BE";
427 charsetstr2 = "UTF-8"; 427 charsetstr2 = "UTF-8";
428 } else if (charset == AIM_CHARSET_CUSTOM) { 428 } else if (charset == AIM_CHARSET_CUSTOM) {
429 if ((sourcesn != NULL) && aim_snvalid_icq(sourcesn)) 429 if ((sourcesn != NULL) && aim_snvalid_icq(sourcesn))
430 charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); 430 charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
431 else 431 else
493 493
494 /* 494 /*
495 * If we're sending to an ICQ user, and they are in our 495 * If we're sending to an ICQ user, and they are in our
496 * buddy list, and they are advertising the Unicode 496 * buddy list, and they are advertising the Unicode
497 * capability, and they are online, then attempt to send 497 * capability, and they are online, then attempt to send
498 * as UCS-2BE. 498 * as UTF-16BE.
499 */ 499 */
500 if ((destsn != NULL) && aim_snvalid_icq(destsn)) 500 if ((destsn != NULL) && aim_snvalid_icq(destsn))
501 userinfo = aim_locate_finduserinfo(od, destsn); 501 userinfo = aim_locate_finduserinfo(od, destsn);
502 502
503 if ((userinfo != NULL) && (userinfo->capabilities & OSCAR_CAPABILITY_UNICODE)) 503 if ((userinfo != NULL) && (userinfo->capabilities & OSCAR_CAPABILITY_UNICODE))
504 { 504 {
505 PurpleBuddy *b; 505 PurpleBuddy *b;
506 b = purple_find_buddy(account, destsn); 506 b = purple_find_buddy(account, destsn);
507 if ((b != NULL) && (PURPLE_BUDDY_IS_ONLINE(b))) 507 if ((b != NULL) && (PURPLE_BUDDY_IS_ONLINE(b)))
508 { 508 {
509 *msg = g_convert(from, -1, "UCS-2BE", "UTF-8", NULL, &msglen, NULL); 509 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, NULL);
510 if (*msg != NULL) 510 if (*msg != NULL)
511 { 511 {
512 *charset = AIM_CHARSET_UNICODE; 512 *charset = AIM_CHARSET_UNICODE;
513 *charsubset = 0x0000; 513 *charsubset = 0x0000;
514 *msglen_int = msglen; 514 *msglen_int = msglen;
536 *msglen_int = msglen; 536 *msglen_int = msglen;
537 return; 537 return;
538 } 538 }
539 539
540 /* 540 /*
541 * Nothing else worked, so send as UCS-2BE. 541 * Nothing else worked, so send as UTF-16BE.
542 */ 542 */
543 *msg = g_convert(from, -1, "UCS-2BE", "UTF-8", NULL, &msglen, &err); 543 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, &err);
544 if (*msg != NULL) { 544 if (*msg != NULL) {
545 *charset = AIM_CHARSET_UNICODE; 545 *charset = AIM_CHARSET_UNICODE;
546 *charsubset = 0x0000; 546 *charsubset = 0x0000;
547 *msglen_int = msglen; 547 *msglen_int = msglen;
548 return; 548 return;
4434 int charset = 0; 4434 int charset = 0;
4435 char *encoded = NULL; 4435 char *encoded = NULL;
4436 4436
4437 charset = oscar_charset_check(str); 4437 charset = oscar_charset_check(str);
4438 if (charset == AIM_CHARSET_UNICODE) { 4438 if (charset == AIM_CHARSET_UNICODE) {
4439 encoded = g_convert(str, -1, "UCS-2BE", "UTF-8", NULL, ret_len, NULL); 4439 encoded = g_convert(str, -1, "UTF-16BE", "UTF-8", NULL, ret_len, NULL);
4440 *encoding = "unicode-2-0"; 4440 *encoding = "unicode-2-0";
4441 } else if (charset == AIM_CHARSET_CUSTOM) { 4441 } else if (charset == AIM_CHARSET_CUSTOM) {
4442 encoded = g_convert(str, -1, "ISO-8859-1", "UTF-8", NULL, ret_len, NULL); 4442 encoded = g_convert(str, -1, "ISO-8859-1", "UTF-8", NULL, ret_len, NULL);
4443 *encoding = "iso-8859-1"; 4443 *encoding = "iso-8859-1";
4444 } else { 4444 } else {