Mercurial > pidgin
changeset 28012:c06114f3d58d
* Change yahoo_html_to_codes() a little to hopefully be more straight forward
and more similar to yahoo_codes_to_html()
* Add documentation to yahoo_html_to_codes() that explains the differences
between the encoded text that we send and the encoded text sent by yahoo
* Enable the test caes for yahoo_html_to_codes()
author | Mark Doliner <mark@kingant.net> |
---|---|
date | Wed, 19 Aug 2009 22:01:10 +0000 |
parents | bbb9e0ea67fe |
children | b289449f3e9f cf533027c3be |
files | libpurple/protocols/yahoo/libymsg.h libpurple/protocols/yahoo/util.c libpurple/tests/test_yahoo_util.c |
diffstat | 3 files changed, 134 insertions(+), 150 deletions(-) [+] |
line wrap: on
line diff
--- a/libpurple/protocols/yahoo/libymsg.h Wed Aug 19 18:54:54 2009 +0000 +++ b/libpurple/protocols/yahoo/libymsg.h Wed Aug 19 22:01:10 2009 +0000 @@ -280,6 +280,27 @@ void yahoo_init_colorht(void); void yahoo_dest_colorht(void); char *yahoo_codes_to_html(const char *x); + +/** + * This function takes a normal HTML message and converts it to the message + * format used by Yahoo, which uses a frankensteinish combination of ANSI + * escape codes and broken HTML. + * + * It results in slightly different output than would be sent by official + * Yahoo clients. The two main differences are: + * + * 1. We always close all tags, whereas official Yahoo clients leave tags + * dangling open at the end of each message (and the client treats them + * as closed). + * 2. We always close inner tags first before closing outter tags. + * + * For example, if you want to send this message: + * <b> bold <i> bolditalic </i></b><i> italic </i> + * Official Yahoo clients would send: + * ESC[1m bold ESC[2m bolditalic ESC[x1m italic + * But we will send: + * ESC[1m bold ESC[2m bolditalic ESC[x2mESC[x1mESC[2m italic ESC[x2m + */ char *yahoo_html_to_codes(const char *src); gboolean
--- a/libpurple/protocols/yahoo/util.c Wed Aug 19 18:54:54 2009 +0000 +++ b/libpurple/protocols/yahoo/util.c Wed Aug 19 22:01:10 2009 +0000 @@ -669,7 +669,7 @@ #define POINT_SIZE(x) (_point_sizes [MIN ((x > 0 ? x : 1), MAX_FONT_SIZE) - 1]) static const gint _point_sizes [] = { 8, 10, 12, 14, 20, 30, 40 }; -enum fatype +enum fontattr_type { FATYPE_SIZE, FATYPE_COLOR, @@ -679,7 +679,7 @@ typedef struct { - enum fatype type; + enum fontattr_type type; union { int size; char *color; @@ -688,6 +688,17 @@ } u; } fontattr; +typedef struct +{ + gboolean bold; + gboolean italic; + gboolean underline; + gboolean in_link; + int font_size; + char *font_face; + char *font_color; +} CurrentMsgState; + static void fontattr_free(fontattr *f) { if (f->type == FATYPE_COLOR) @@ -876,167 +887,124 @@ GString *dest; char *esc; GQueue *ftattr = NULL; - gboolean no_more_specials = FALSE; + gboolean no_more_gt_brackets = FALSE; + gchar *tag, *tag_name; + gboolean is_closing_tag; + CurrentMsgState current_state; + + bzero(¤t_state, sizeof(current_state)); src_len = strlen(src); dest = g_string_sized_new(src_len); for (i = 0; i < src_len; i++) { - - if (src[i] == '<' && !no_more_specials) { + if (src[i] == '<' && !no_more_gt_brackets) { + /* The start of an HTML tag */ j = i; - while (1) { - j++; - - if (j >= src_len) { /* no '>' */ - g_string_append_c(dest, src[i]); - no_more_specials = TRUE; - break; - } + while (j++ < src_len) { + if (src[j] != '>') { + if (src[j] == '"') { + /* We're inside a quoted attribute value. Skip to the end */ + j++; + while (j != src_len && src[j] != '"') + j++; + } else if (src[j] == '\'') { + /* We're inside a quoted attribute value. Skip to the end */ + j++; + while (j != src_len && src[j] != '\'') + j++; + } + if (j != src_len) + /* Keep looking for the end of this tag */ + continue; - if (src[j] == '<') { - /* FIXME: This doesn't convert outgoing entities. - * However, I suspect this case may never - * happen anymore because of the entities. - */ - g_string_append_len(dest, &src[i], j - i); - i = j - 1; - if (ftattr) { - fontattr *f; - - while ((f = g_queue_pop_head(ftattr))) - fontattr_free(f); - g_queue_free(ftattr); - ftattr = NULL; - } + /* This < has no corresponding > */ + g_string_append_c(dest, src[i]); + no_more_gt_brackets = TRUE; break; } - if (src[j] == ' ') { - if (!g_ascii_strncasecmp(&src[i+1], "BODY", j - i - 1)) { - char *t = strchr(&src[j], '>'); - if (!t) { - g_string_append(dest, &src[i]); + tag = g_strndup(src + i, j - i + 1); + tag_name = yahoo_markup_get_tag_name(tag, &is_closing_tag); + + if (g_str_equal(tag_name, "a")) { + j += 7; + g_string_append(dest, "\033[lm"); + if (purple_str_has_prefix(src + j, "mailto:")) + j += sizeof("mailto:") - 1; + while (1) { + g_string_append_c(dest, src[j]); + if (++j >= src_len) { i = src_len; break; - } else { - i = t - src; + } + if (src[j] == '"') { + g_string_append(dest, "\033[xlm"); + while (1) { + if (++j >= src_len) { + i = src_len; + break; + } + if (!g_ascii_strncasecmp(&src[j], "</A>", 4)) { + j += 3; + break; + } + } + i = j; break; } - } else if (!g_ascii_strncasecmp(&src[i+1], "A HREF=\"", j - i - 1)) { - j += 7; - g_string_append(dest, "\033[lm"); - if (purple_str_has_prefix(src + j, "mailto:")) - j += sizeof("mailto:") - 1; - while (1) { - g_string_append_c(dest, src[j]); - if (++j >= src_len) { - i = src_len; - break; - } - if (src[j] == '"') { - g_string_append(dest, "\033[xlm"); - while (1) { - if (++j >= src_len) { - i = src_len; - break; - } - if (!g_ascii_strncasecmp(&src[j], "</A>", 4)) { - j += 3; - break; - } - } - i = j; - break; - } - } - } else if (!g_ascii_strncasecmp(&src[i+1], "SPAN", j - i - 1)) { /* drop span tags */ - while (1) { - if (++j >= src_len) { - g_string_append(dest, &src[i]); - i = src_len; - break; - } - if (src[j] == '>') { - i = j; - break; + } + + } else if (g_str_equal(tag_name, "font")) { + _parse_font_tag(src, dest, &i, &j, src_len, &colors, &tags, ftattr); + } else if (g_str_equal(tag_name, "b")) { + g_string_append(dest, "\033[1m"); + current_state.bold = TRUE; + } else if (g_str_equal(tag_name, "/b")) { + if (current_state.bold) { + g_string_append(dest, "\033[x1m"); + current_state.bold = FALSE; + } + } else if (g_str_equal(tag_name, "i")) { + current_state.italic = TRUE; + g_string_append(dest, "\033[2m"); + } else if (g_str_equal(tag_name, "/i")) { + if (current_state.italic) { + g_string_append(dest, "\033[x2m"); + current_state.italic = FALSE; + } + } else if (g_str_equal(tag_name, "u")) { + current_state.underline = TRUE; + g_string_append(dest, "\033[4m"); + } else if (g_str_equal(tag_name, "/u")) { + if (current_state.underline) { + g_string_append(dest, "\033[x4m"); + current_state.underline = FALSE; + } + } else if (g_str_equal(tag_name, "/a")) { + g_string_append(dest, "\033[xlm"); + } else if (g_str_equal(tag_name, "br")) { + g_string_append_c(dest, '\n'); + } else if (g_str_equal(tag_name, "/font")) { + if (tags != NULL) { + char *etag = tags->data; + tags = g_slist_delete_link(tags, tags); + g_string_append(dest, etag); + if (g_str_equal(etag, "</font>")) { + if (colors != NULL) { + g_free(colors->data); + colors = g_slist_delete_link(colors, colors); } } - } else if (g_ascii_strncasecmp(&src[i+1], "FONT", j - i - 1)) { /* not interested! */ - while (1) { - if (++j >= src_len) { - g_string_append(dest, &src[i]); - i = src_len; - break; - } - if (src[j] == '>') { - g_string_append_len(dest, &src[i], j - i + 1); - i = j; - break; - } - } - } else { /* yay we have a font tag */ - _parse_font_tag(src, dest, &i, &j, src_len, &colors, &tags, ftattr); + g_free(etag); } - - break; } - if (src[j] == '>') { - /* This has some problems like the FIXME for the - * '<' case. and like that case, I suspect the case - * that this has problems is won't happen anymore anyway. - */ - int sublen = j - i - 1; - - if (sublen) { - if (!g_ascii_strncasecmp(&src[i+1], "B", sublen)) { - g_string_append(dest, "\033[1m"); - } else if (!g_ascii_strncasecmp(&src[i+1], "/B", sublen)) { - g_string_append(dest, "\033[x1m"); - } else if (!g_ascii_strncasecmp(&src[i+1], "I", sublen)) { - g_string_append(dest, "\033[2m"); - } else if (!g_ascii_strncasecmp(&src[i+1], "/I", sublen)) { - g_string_append(dest, "\033[x2m"); - } else if (!g_ascii_strncasecmp(&src[i+1], "U", sublen)) { - g_string_append(dest, "\033[4m"); - } else if (!g_ascii_strncasecmp(&src[i+1], "/U", sublen)) { - g_string_append(dest, "\033[x4m"); - } else if (!g_ascii_strncasecmp(&src[i+1], "/A", sublen)) { - g_string_append(dest, "\033[xlm"); - } else if (!g_ascii_strncasecmp(&src[i+1], "BR", sublen)) { - g_string_append_c(dest, '\n'); - } else if (!g_ascii_strncasecmp(&src[i+1], "/BODY", sublen)) { - /* mmm, </body> tags. *BURP* */ - } else if (!g_ascii_strncasecmp(&src[i+1], "/SPAN", sublen)) { - /* </span> tags. dangerously close to </spam> */ - } else if (!g_ascii_strncasecmp(&src[i+1], "/FONT", sublen) && tags != NULL) { - char *etag; - - etag = tags->data; - tags = g_slist_delete_link(tags, tags); - if (etag) { - g_string_append(dest, etag); - if (!strcmp(etag, "</font>")) { - if (colors != NULL) { - g_free(colors->data); - colors = g_slist_delete_link(colors, colors); - } - } - g_free(etag); - } - } else { - g_string_append_len(dest, &src[i], j - i + 1); - } - } else { - g_string_append_len(dest, &src[i], j - i + 1); - } - - i = j; - break; - } - + i = j; + g_free(tag); + g_free(tag_name); + break; } } else {
--- a/libpurple/tests/test_yahoo_util.c Wed Aug 19 18:54:54 2009 +0000 +++ b/libpurple/tests/test_yahoo_util.c Wed Aug 19 22:01:10 2009 +0000 @@ -104,7 +104,6 @@ } END_TEST -#if 0 START_TEST(test_html_to_codes) { assert_string_equal_free("plain", @@ -129,7 +128,6 @@ yahoo_html_to_codes("plain &")); /* bold/italic/underline */ - // MARK: This isn't correct. Should not have the closing bold escape code assert_string_equal_free("\x1B[1mbold\x1B[x1m", yahoo_html_to_codes("<b>bold</b>")); assert_string_equal_free("\x1B[2mitalic\x1B[x2m", @@ -140,13 +138,12 @@ yahoo_html_to_codes("no</u> markup")); assert_string_equal_free("\x1B[1mbold\x1B[x1m \x1B[2mitalic\x1B[x2m \x1B[4munderline\x1B[x4m", yahoo_html_to_codes("<b>bold</b> <i>italic</i> <u>underline</u>")); - assert_string_equal_free("\x1B[1mbold \x1B[2mbolditalic\x1B[x1m italic\x1B[x1m", + assert_string_equal_free("\x1B[1mbold \x1B[2mbolditalic\x1B[x2m\x1B[x1m\x1B[2m italic\x1B[x2m", yahoo_html_to_codes("<b>bold <i>bolditalic</i></b><i> italic</i>")); - assert_string_equal_free("\x1B[1mbold \x1B[2mbolditalic\x1B[x1m \x1B[4mitalicunderline", + assert_string_equal_free("\x1B[1mbold \x1B[2mbolditalic\x1B[x2m\x1B[x1m\x1B[2m \x1B[4mitalicunderline\x1B[x4m\x1B[x2m", yahoo_html_to_codes("<b>bold <i>bolditalic</i></b><i> <u>italicunderline</u></i>")); } END_TEST -#endif Suite * yahoo_util_suite(void) @@ -161,11 +158,9 @@ tcase_add_test(tc, test_codes_to_html); suite_add_tcase(s, tc); -#if 0 tc = tcase_create("Convert IM from HTML to network format"); tcase_add_test(tc, test_html_to_codes); suite_add_tcase(s, tc); -#endif return s; }