comparison src/util.c @ 12880:bad785371fa5

[gaim-migrate @ 15232] reworks gaim_utf8_ncr_decode to handle hex and octal as well as decimal formats. I tested this a bit with sametime, and it seems to be working fine. None of our target platforms are weird and are missing strtoul, I hope... committer: Tailor Script <tailor@pidgin.im>
author Christopher O'Brien <siege@pidgin.im>
date Sun, 15 Jan 2006 05:50:28 +0000
parents 547c199072c8
children 4229503f1cd9
comparison
equal deleted inserted replaced
12879:f8748df5c17a 12880:bad785371fa5
3450 return g_string_free(workstr, FALSE); 3450 return g_string_free(workstr, FALSE);
3451 } 3451 }
3452 3452
3453 3453
3454 char * 3454 char *
3455 gaim_utf8_ncr_encode(const char *in) 3455 gaim_utf8_ncr_encode(const char *str)
3456 { 3456 {
3457 GString *out; 3457 GString *out;
3458 3458
3459 g_return_val_if_fail(in != NULL, NULL); 3459 g_return_val_if_fail(str != NULL, NULL);
3460 g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL); 3460 g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL);
3461 3461
3462 out = g_string_new(""); 3462 out = g_string_new("");
3463 3463
3464 for(; *in; in = g_utf8_next_char(in)) { 3464 for(; *str; str = g_utf8_next_char(str)) {
3465 gunichar wc = g_utf8_get_char(in); 3465 gunichar wc = g_utf8_get_char(str);
3466 3466
3467 if(wc >= 0x80) { /* super simple check. hopefully not too wrong. */ 3467 /* super simple check. hopefully not too wrong. */
3468 if(wc >= 0x80) {
3468 g_string_append_printf(out, "&#%u;", (guint32) wc); 3469 g_string_append_printf(out, "&#%u;", (guint32) wc);
3469 } else { 3470 } else {
3470 g_string_append_unichar(out, wc); 3471 g_string_append_unichar(out, wc);
3471 } 3472 }
3472 } 3473 }
3474 return g_string_free(out, FALSE); 3475 return g_string_free(out, FALSE);
3475 } 3476 }
3476 3477
3477 3478
3478 char * 3479 char *
3479 gaim_utf8_ncr_decode(const char *in) 3480 gaim_utf8_ncr_decode(const char *str)
3480 { 3481 {
3481 GString *out; 3482 GString *out;
3482 int i; 3483 char *buf, *b;
3483 3484
3484 g_return_val_if_fail(in != NULL, NULL); 3485 g_return_val_if_fail(str != NULL, NULL);
3485 g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL); 3486 g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL);
3486 3487
3488 buf = (char *) str;
3487 out = g_string_new(""); 3489 out = g_string_new("");
3488 3490
3489 /** @todo doesn't this break with hex formats? */ 3491 while( (b = strstr(buf, "&#")) ) {
3490 for (i = 0; in[i]; i += 1) { 3492 gunichar wc;
3491 gboolean ncr_found_p = FALSE; 3493 int base = 0;
3492 if (in[i] == '&' && in[i + 1] == '#' && isdigit(in[i + 2])) { 3494
3493 gunichar wc; 3495 /* append everything leading up to the &# */
3494 int j; 3496 g_string_append_len(out, buf, b-buf);
3495 for (wc = 0, j = i + 2; isdigit(in[j]); j += 1) { 3497
3496 wc *= 10; 3498 b += 2; /* skip past the &# */
3497 wc += in[j] - '0'; 3499
3498 } 3500 /* strtoul will handle 0x prefix as hex, but not x */
3499 if (in[j] == ';') { /* Technically not completely correct */ 3501 if(*b == 'x' || *b == 'X')
3500 g_string_append_unichar(out, wc); 3502 base = 16;
3501 i = j; 3503
3502 ncr_found_p = TRUE; 3504 /* advances buf to the end of the ncr segment */
3503 } 3505 wc = (gunichar) strtoul(b, &buf, base);
3504 } 3506
3505 if (!ncr_found_p) { 3507 /* this mimics the previous impl of ncr_decode */
3506 g_string_append_c(out, in[i]); 3508 if(*buf == ';') {
3507 } 3509 g_string_append_unichar(out, wc);
3508 } 3510 buf++;
3511 }
3512 }
3513
3514 /* append whatever's left */
3515 g_string_append(out, buf);
3516
3509 return g_string_free(out, FALSE); 3517 return g_string_free(out, FALSE);
3510 } 3518 }
3519
3511 3520
3512 int 3521 int
3513 gaim_utf8_strcasecmp(const char *a, const char *b) 3522 gaim_utf8_strcasecmp(const char *a, const char *b)
3514 { 3523 {
3515 char *a_norm = NULL; 3524 char *a_norm = NULL;