Mercurial > pidgin
comparison src/util.c @ 12880:bad785371fa5
[gaim-migrate @ 15232]
reworks gaim_utf8_ncr_decode to handle hex and octal as well as decimal formats. I tested this a bit with sametime, and it seems to be working fine. None of our target platforms are weird and are missing strtoul, I hope...
committer: Tailor Script <tailor@pidgin.im>
author | Christopher O'Brien <siege@pidgin.im> |
---|---|
date | Sun, 15 Jan 2006 05:50:28 +0000 |
parents | 547c199072c8 |
children | 4229503f1cd9 |
comparison
equal
deleted
inserted
replaced
12879:f8748df5c17a | 12880:bad785371fa5 |
---|---|
3450 return g_string_free(workstr, FALSE); | 3450 return g_string_free(workstr, FALSE); |
3451 } | 3451 } |
3452 | 3452 |
3453 | 3453 |
3454 char * | 3454 char * |
3455 gaim_utf8_ncr_encode(const char *in) | 3455 gaim_utf8_ncr_encode(const char *str) |
3456 { | 3456 { |
3457 GString *out; | 3457 GString *out; |
3458 | 3458 |
3459 g_return_val_if_fail(in != NULL, NULL); | 3459 g_return_val_if_fail(str != NULL, NULL); |
3460 g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL); | 3460 g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL); |
3461 | 3461 |
3462 out = g_string_new(""); | 3462 out = g_string_new(""); |
3463 | 3463 |
3464 for(; *in; in = g_utf8_next_char(in)) { | 3464 for(; *str; str = g_utf8_next_char(str)) { |
3465 gunichar wc = g_utf8_get_char(in); | 3465 gunichar wc = g_utf8_get_char(str); |
3466 | 3466 |
3467 if(wc >= 0x80) { /* super simple check. hopefully not too wrong. */ | 3467 /* super simple check. hopefully not too wrong. */ |
3468 if(wc >= 0x80) { | |
3468 g_string_append_printf(out, "&#%u;", (guint32) wc); | 3469 g_string_append_printf(out, "&#%u;", (guint32) wc); |
3469 } else { | 3470 } else { |
3470 g_string_append_unichar(out, wc); | 3471 g_string_append_unichar(out, wc); |
3471 } | 3472 } |
3472 } | 3473 } |
3474 return g_string_free(out, FALSE); | 3475 return g_string_free(out, FALSE); |
3475 } | 3476 } |
3476 | 3477 |
3477 | 3478 |
3478 char * | 3479 char * |
3479 gaim_utf8_ncr_decode(const char *in) | 3480 gaim_utf8_ncr_decode(const char *str) |
3480 { | 3481 { |
3481 GString *out; | 3482 GString *out; |
3482 int i; | 3483 char *buf, *b; |
3483 | 3484 |
3484 g_return_val_if_fail(in != NULL, NULL); | 3485 g_return_val_if_fail(str != NULL, NULL); |
3485 g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL); | 3486 g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL); |
3486 | 3487 |
3488 buf = (char *) str; | |
3487 out = g_string_new(""); | 3489 out = g_string_new(""); |
3488 | 3490 |
3489 /** @todo doesn't this break with hex formats? */ | 3491 while( (b = strstr(buf, "&#")) ) { |
3490 for (i = 0; in[i]; i += 1) { | 3492 gunichar wc; |
3491 gboolean ncr_found_p = FALSE; | 3493 int base = 0; |
3492 if (in[i] == '&' && in[i + 1] == '#' && isdigit(in[i + 2])) { | 3494 |
3493 gunichar wc; | 3495 /* append everything leading up to the &# */ |
3494 int j; | 3496 g_string_append_len(out, buf, b-buf); |
3495 for (wc = 0, j = i + 2; isdigit(in[j]); j += 1) { | 3497 |
3496 wc *= 10; | 3498 b += 2; /* skip past the &# */ |
3497 wc += in[j] - '0'; | 3499 |
3498 } | 3500 /* strtoul will handle 0x prefix as hex, but not x */ |
3499 if (in[j] == ';') { /* Technically not completely correct */ | 3501 if(*b == 'x' || *b == 'X') |
3500 g_string_append_unichar(out, wc); | 3502 base = 16; |
3501 i = j; | 3503 |
3502 ncr_found_p = TRUE; | 3504 /* advances buf to the end of the ncr segment */ |
3503 } | 3505 wc = (gunichar) strtoul(b, &buf, base); |
3504 } | 3506 |
3505 if (!ncr_found_p) { | 3507 /* this mimics the previous impl of ncr_decode */ |
3506 g_string_append_c(out, in[i]); | 3508 if(*buf == ';') { |
3507 } | 3509 g_string_append_unichar(out, wc); |
3508 } | 3510 buf++; |
3511 } | |
3512 } | |
3513 | |
3514 /* append whatever's left */ | |
3515 g_string_append(out, buf); | |
3516 | |
3509 return g_string_free(out, FALSE); | 3517 return g_string_free(out, FALSE); |
3510 } | 3518 } |
3519 | |
3511 | 3520 |
3512 int | 3521 int |
3513 gaim_utf8_strcasecmp(const char *a, const char *b) | 3522 gaim_utf8_strcasecmp(const char *a, const char *b) |
3514 { | 3523 { |
3515 char *a_norm = NULL; | 3524 char *a_norm = NULL; |