Mercurial > pidgin.yaz
comparison libpurple/protocols/oscar/oscar.c @ 22424:7b4d39ffd563
Change OSCAR to use UTF-16 rather than UCS-2 conversions. Apparently
some iconv implementations (notably win_iconv) are dropping UCS-2
support, due to the impression that it is obsolete. For BMP
characters, UTF-16 should be bitwise identical to UCS-2, so if OSCAR
really *is* UCS-2, this shouldn't hurt anything. If it turns out it's
capable of UTF-16, well, then, more's the better.
author | Ethan Blanton <elb@pidgin.im> |
---|---|
date | Fri, 07 Mar 2008 04:01:20 +0000 |
parents | 5762dcb1909c |
children | c2b9399d95b2 9cd7baa0627e |
comparison
equal
deleted
inserted
replaced
22422:361b2e61a219 | 22424:7b4d39ffd563 |
---|---|
243 } | 243 } |
244 i++; | 244 i++; |
245 } | 245 } |
246 | 246 |
247 /* | 247 /* |
248 * Must we send this message as UNICODE (in the UCS-2BE encoding)? | 248 * Must we send this message as UNICODE (in the UTF-16BE encoding)? |
249 */ | 249 */ |
250 while (utf8[i]) | 250 while (utf8[i]) |
251 { | 251 { |
252 /* ISO-8859-1 is 0x00-0xbf in the first byte | 252 /* ISO-8859-1 is 0x00-0xbf in the first byte |
253 * followed by 0xc0-0xc3 in the second */ | 253 * followed by 0xc0-0xc3 in the second */ |
312 { | 312 { |
313 utf8 = g_convert(text, textlen, "UTF-8", "Windows-1252", NULL, NULL, NULL); | 313 utf8 = g_convert(text, textlen, "UTF-8", "Windows-1252", NULL, NULL, NULL); |
314 } else if (!g_ascii_strcasecmp(encoding, "unicode-2-0")) { | 314 } else if (!g_ascii_strcasecmp(encoding, "unicode-2-0")) { |
315 /* Some official ICQ clients are apparently total crack, | 315 /* Some official ICQ clients are apparently total crack, |
316 * and have been known to save a UTF-8 string converted | 316 * and have been known to save a UTF-8 string converted |
317 * from the locale character set to UCS-2 (not from UTF-8 | 317 * from the locale character set to UTF-16 (not from UTF-8 |
318 * to UCS-2!) in the away message. This hack should find | 318 * to UTF-16!) in the away message. This hack should find |
319 * and do something (un)reasonable with that, and not | 319 * and do something (un)reasonable with that, and not |
320 * mess up too much else. */ | 320 * mess up too much else. */ |
321 const gchar *charset = purple_account_get_string(account, "encoding", NULL); | 321 const gchar *charset = purple_account_get_string(account, "encoding", NULL); |
322 if (charset) { | 322 if (charset) { |
323 gsize len; | 323 gsize len; |
324 utf8 = g_convert(text, textlen, charset, "UCS-2BE", &len, NULL, NULL); | 324 utf8 = g_convert(text, textlen, charset, "UTF-16BE", &len, NULL, NULL); |
325 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) { | 325 if (!utf8 || len != textlen || !g_utf8_validate(utf8, -1, NULL)) { |
326 g_free(utf8); | 326 g_free(utf8); |
327 utf8 = NULL; | 327 utf8 = NULL; |
328 } else { | 328 } else { |
329 purple_debug_info("oscar", "Used broken ICQ fallback encoding\n"); | 329 purple_debug_info("oscar", "Used broken ICQ fallback encoding\n"); |
330 } | 330 } |
331 } | 331 } |
332 if (!utf8) | 332 if (!utf8) |
333 utf8 = g_convert(text, textlen, "UTF-8", "UCS-2BE", NULL, NULL, NULL); | 333 utf8 = g_convert(text, textlen, "UTF-8", "UTF-16BE", NULL, NULL, NULL); |
334 } else if (g_ascii_strcasecmp(encoding, "utf-8")) { | 334 } else if (g_ascii_strcasecmp(encoding, "utf-8")) { |
335 purple_debug_warning("oscar", "Unrecognized character encoding \"%s\", " | 335 purple_debug_warning("oscar", "Unrecognized character encoding \"%s\", " |
336 "attempting to convert to UTF-8 anyway\n", encoding); | 336 "attempting to convert to UTF-8 anyway\n", encoding); |
337 utf8 = g_convert(text, textlen, "UTF-8", encoding, NULL, NULL, NULL); | 337 utf8 = g_convert(text, textlen, "UTF-8", encoding, NULL, NULL, NULL); |
338 } | 338 } |
421 | 421 |
422 if ((datalen == 0) || (data == NULL)) | 422 if ((datalen == 0) || (data == NULL)) |
423 return NULL; | 423 return NULL; |
424 | 424 |
425 if (charset == AIM_CHARSET_UNICODE) { | 425 if (charset == AIM_CHARSET_UNICODE) { |
426 charsetstr1 = "UCS-2BE"; | 426 charsetstr1 = "UTF-16BE"; |
427 charsetstr2 = "UTF-8"; | 427 charsetstr2 = "UTF-8"; |
428 } else if (charset == AIM_CHARSET_CUSTOM) { | 428 } else if (charset == AIM_CHARSET_CUSTOM) { |
429 if ((sourcesn != NULL) && aim_snvalid_icq(sourcesn)) | 429 if ((sourcesn != NULL) && aim_snvalid_icq(sourcesn)) |
430 charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); | 430 charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING); |
431 else | 431 else |
493 | 493 |
494 /* | 494 /* |
495 * If we're sending to an ICQ user, and they are in our | 495 * If we're sending to an ICQ user, and they are in our |
496 * buddy list, and they are advertising the Unicode | 496 * buddy list, and they are advertising the Unicode |
497 * capability, and they are online, then attempt to send | 497 * capability, and they are online, then attempt to send |
498 * as UCS-2BE. | 498 * as UTF-16BE. |
499 */ | 499 */ |
500 if ((destsn != NULL) && aim_snvalid_icq(destsn)) | 500 if ((destsn != NULL) && aim_snvalid_icq(destsn)) |
501 userinfo = aim_locate_finduserinfo(od, destsn); | 501 userinfo = aim_locate_finduserinfo(od, destsn); |
502 | 502 |
503 if ((userinfo != NULL) && (userinfo->capabilities & OSCAR_CAPABILITY_UNICODE)) | 503 if ((userinfo != NULL) && (userinfo->capabilities & OSCAR_CAPABILITY_UNICODE)) |
504 { | 504 { |
505 PurpleBuddy *b; | 505 PurpleBuddy *b; |
506 b = purple_find_buddy(account, destsn); | 506 b = purple_find_buddy(account, destsn); |
507 if ((b != NULL) && (PURPLE_BUDDY_IS_ONLINE(b))) | 507 if ((b != NULL) && (PURPLE_BUDDY_IS_ONLINE(b))) |
508 { | 508 { |
509 *msg = g_convert(from, -1, "UCS-2BE", "UTF-8", NULL, &msglen, NULL); | 509 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, NULL); |
510 if (*msg != NULL) | 510 if (*msg != NULL) |
511 { | 511 { |
512 *charset = AIM_CHARSET_UNICODE; | 512 *charset = AIM_CHARSET_UNICODE; |
513 *charsubset = 0x0000; | 513 *charsubset = 0x0000; |
514 *msglen_int = msglen; | 514 *msglen_int = msglen; |
536 *msglen_int = msglen; | 536 *msglen_int = msglen; |
537 return; | 537 return; |
538 } | 538 } |
539 | 539 |
540 /* | 540 /* |
541 * Nothing else worked, so send as UCS-2BE. | 541 * Nothing else worked, so send as UTF-16BE. |
542 */ | 542 */ |
543 *msg = g_convert(from, -1, "UCS-2BE", "UTF-8", NULL, &msglen, &err); | 543 *msg = g_convert(from, -1, "UTF-16BE", "UTF-8", NULL, &msglen, &err); |
544 if (*msg != NULL) { | 544 if (*msg != NULL) { |
545 *charset = AIM_CHARSET_UNICODE; | 545 *charset = AIM_CHARSET_UNICODE; |
546 *charsubset = 0x0000; | 546 *charsubset = 0x0000; |
547 *msglen_int = msglen; | 547 *msglen_int = msglen; |
548 return; | 548 return; |
4434 int charset = 0; | 4434 int charset = 0; |
4435 char *encoded = NULL; | 4435 char *encoded = NULL; |
4436 | 4436 |
4437 charset = oscar_charset_check(str); | 4437 charset = oscar_charset_check(str); |
4438 if (charset == AIM_CHARSET_UNICODE) { | 4438 if (charset == AIM_CHARSET_UNICODE) { |
4439 encoded = g_convert(str, -1, "UCS-2BE", "UTF-8", NULL, ret_len, NULL); | 4439 encoded = g_convert(str, -1, "UTF-16BE", "UTF-8", NULL, ret_len, NULL); |
4440 *encoding = "unicode-2-0"; | 4440 *encoding = "unicode-2-0"; |
4441 } else if (charset == AIM_CHARSET_CUSTOM) { | 4441 } else if (charset == AIM_CHARSET_CUSTOM) { |
4442 encoded = g_convert(str, -1, "ISO-8859-1", "UTF-8", NULL, ret_len, NULL); | 4442 encoded = g_convert(str, -1, "ISO-8859-1", "UTF-8", NULL, ret_len, NULL); |
4443 *encoding = "iso-8859-1"; | 4443 *encoding = "iso-8859-1"; |
4444 } else { | 4444 } else { |