# HG changeset patch # User Christopher O'Brien # Date 1137304228 0 # Node ID bad785371fa50d66b5b474031679cee9c1a0ff32 # Parent f8748df5c17a0cf4d0814c2976ba24f2e489c46d [gaim-migrate @ 15232] reworks gaim_utf8_ncr_decode to handle hex and octal as well as decimal formats. I tested this a bit with sametime, and it seems to be working fine. None of our target platforms are weird and are missing strtoul, I hope... committer: Tailor Script diff -r f8748df5c17a -r bad785371fa5 src/util.c --- a/src/util.c Sun Jan 15 03:53:36 2006 +0000 +++ b/src/util.c Sun Jan 15 05:50:28 2006 +0000 @@ -3452,19 +3452,20 @@ char * -gaim_utf8_ncr_encode(const char *in) +gaim_utf8_ncr_encode(const char *str) { GString *out; - g_return_val_if_fail(in != NULL, NULL); - g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL); + g_return_val_if_fail(str != NULL, NULL); + g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL); out = g_string_new(""); - for(; *in; in = g_utf8_next_char(in)) { - gunichar wc = g_utf8_get_char(in); - - if(wc >= 0x80) { /* super simple check. hopefully not too wrong. */ + for(; *str; str = g_utf8_next_char(str)) { + gunichar wc = g_utf8_get_char(str); + + /* super simple check. hopefully not too wrong. */ + if(wc >= 0x80) { g_string_append_printf(out, "&#%u;", (guint32) wc); } else { g_string_append_unichar(out, wc); @@ -3476,39 +3477,47 @@ char * -gaim_utf8_ncr_decode(const char *in) +gaim_utf8_ncr_decode(const char *str) { GString *out; - int i; - - g_return_val_if_fail(in != NULL, NULL); - g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL); - + char *buf, *b; + + g_return_val_if_fail(str != NULL, NULL); + g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL); + + buf = (char *) str; out = g_string_new(""); - /** @todo doesn't this break with hex formats? */ - for (i = 0; in[i]; i += 1) { - gboolean ncr_found_p = FALSE; - if (in[i] == '&' && in[i + 1] == '#' && isdigit(in[i + 2])) { - gunichar wc; - int j; - for (wc = 0, j = i + 2; isdigit(in[j]); j += 1) { - wc *= 10; - wc += in[j] - '0'; - } - if (in[j] == ';') { /* Technically not completely correct */ - g_string_append_unichar(out, wc); - i = j; - ncr_found_p = TRUE; - } - } - if (!ncr_found_p) { - g_string_append_c(out, in[i]); + while( (b = strstr(buf, "&#")) ) { + gunichar wc; + int base = 0; + + /* append everything leading up to the &# */ + g_string_append_len(out, buf, b-buf); + + b += 2; /* skip past the &# */ + + /* strtoul will handle 0x prefix as hex, but not x */ + if(*b == 'x' || *b == 'X') + base = 16; + + /* advances buf to the end of the ncr segment */ + wc = (gunichar) strtoul(b, &buf, base); + + /* this mimics the previous impl of ncr_decode */ + if(*buf == ';') { + g_string_append_unichar(out, wc); + buf++; } } + + /* append whatever's left */ + g_string_append(out, buf); + return g_string_free(out, FALSE); } + int gaim_utf8_strcasecmp(const char *a, const char *b) {