# HG changeset patch # User Yoshiki Yazawa # Date 1216752839 -32400 # Node ID e67b0231ba56f118cb9a6e0848ea7ce794db1f62 # Parent cae8d5dd24d02cda3fc20cd7b03cabe6c7468e1b completely re-wrote strip_html_markup(). new function permits non-tag strings surrounded by <>. diff -r cae8d5dd24d0 -r e67b0231ba56 pidgin-twitter.c --- a/pidgin-twitter.c Tue Jul 22 20:00:03 2008 +0900 +++ b/pidgin-twitter.c Wed Jul 23 03:53:59 2008 +0900 @@ -39,163 +39,6 @@ /* functions */ /*************/ -/* this function is a modified clone of purple_markup_strip_html() */ -static char * -strip_html_markup(const char *str) -{ - int i, j, k, entlen; - gboolean visible = TRUE; - gboolean closing_td_p = FALSE; - gchar *str2; - const gchar *cdata_close_tag = NULL, *ent; - gchar *href = NULL; - int href_st = 0; - - if(!str) - return NULL; - - str2 = g_strdup(str); - - for (i = 0, j = 0; str2[i]; i++) - { - if (str2[i] == '<') - { - if (cdata_close_tag) - { - /* Note: Don't even assume any other tag is a tag in CDATA */ - if (g_ascii_strncasecmp(str2 + i, cdata_close_tag, - strlen(cdata_close_tag)) == 0) - { - i += strlen(cdata_close_tag) - 1; - cdata_close_tag = NULL; - } - continue; - } - else if (g_ascii_strncasecmp(str2 + i, "", 5) == 0) - { - closing_td_p = TRUE; - visible = FALSE; - } - else - { - closing_td_p = FALSE; - visible = TRUE; - } - - k = i + 1; - - if(g_ascii_isspace(str2[k])) - visible = TRUE; - else if (str2[k]) - { - /* Scan until we end the tag either implicitly (closed start - * tag) or explicitly, using a sloppy method (i.e., < or > - * inside quoted attributes will screw us up) - */ - while (str2[k] && str2[k] != '<' && str2[k] != '>') - { - k++; - } - - /* If we've got an tag with an href, save the address - * to print later. */ - if (g_ascii_strncasecmp(str2 + i, "", 3) == 0 - || g_ascii_strncasecmp(str2 + i, "", 8) == 0) - { - str2[j++] = '\n'; - } - else if (g_ascii_strncasecmp(str2 + i, "", + "", + "", + "

", + "

", + "
", + "", + "", + "", + "", + "", + "", + "
", + "
", + " */ + gchar *html, *str; /* copied src and str to be returned */ + gchar *vis1, *vis2; /* begin and end of address part */ + gchar *startp; /* starting point marker */ + gchar **tagp; /* tag iterator */ + gchar *tmp, *tmp2; /* scratches */ + + g_return_val_if_fail(src != NULL, NULL); + + const gchar *ptr, *ent; + gchar *ptr2; + gint entlen; + + /* unescape &x; */ + html = g_malloc0(strlen(src)); + ptr2 = html; + for(ptr = src; *ptr; ) { + if(*ptr == '&') { + ent = purple_markup_unescape_entity(ptr, &entlen); + if(ent != NULL) { + while(*ent) { + *ptr2++ = *ent++; + } + ptr += entlen; + } + } + else { + *ptr2++ = *ptr++; + } + } /* for */ + + str = g_strdup("\0"); + + head = html; + tail = head + strlen(html); + startp = head; + +loop: + begin = NULL; + end = NULL; + + if(startp >= tail) { + g_free(html); + return str; + } + + begin = strchr(startp, '<'); + if(begin) + end = strchr(begin + 1, '>'); + if(!end) { + tmp = g_strconcat(str, startp, NULL); + g_free(str); + str = tmp; + g_free(html); + return str; /* no corresponding >, we have done. */ + } + + /* here, both < and > are found */ + /* concatenate leading part to dest */ + tmp = g_strndup(startp, begin - startp); + tmp2 = g_strconcat(str, tmp, NULL); + g_free(tmp); + g_free(str); + str = tmp2; + + /* find tag */ + for(tagp = html_tags; *tagp; tagp++) { + if(!g_ascii_strncasecmp(begin, *tagp, strlen(*tagp))) { + /* we found a valid tag */ + /* if tag is
. */ + startp = end + 1; + goto loop; + } + } /* valid tag */ + } + + /* no valid tag was found: copy */ + tmp = g_strndup(begin, end - begin + 1); + tmp2 = g_strconcat(str, tmp, NULL); + g_free(tmp); + g_free(str); + str = tmp2; + startp = end + 1; + goto loop; +} + /* string utilities */ static void escape(gchar **str) @@ -259,13 +238,20 @@ } static void -strip_markup(gchar **str) +strip_markup(gchar **str, gboolean escape) { - char *plain; + gchar *plain; plain = strip_html_markup(*str); g_free(*str); - *str = plain; + if(escape) { + *str = g_markup_escape_text(plain, -1); + g_free(plain); + } + else { + *str = plain; + } + twitter_debug("result=%s\n", *str); } @@ -481,7 +467,7 @@ if(st->id > lastid && !is_posted_message(st)) { gchar *msg = NULL; - msg = g_strdup_printf("%s: %s\n", st->screen_name, st->text); + msg = g_strdup_printf("%s: %s", st->screen_name, st->text); purple_conv_im_write(conv->u.im, "twitter@twitter.com", msg, @@ -500,7 +486,7 @@ statuseslist = g_list_remove_all(statuseslist, NULL); } -/* status fetching function. it will be called periodically. */ +/* status fetching function. it will be called periodically. */ static gboolean get_status_with_api(gpointer data) { @@ -741,16 +727,16 @@ twitter_ac = is_twitter_account(account, recipient); wassr_ac = is_wassr_account(account, recipient); + /* strip all markups */ + if(twitter_ac || wassr_ac) + strip_markup(buffer, TRUE); + if(wassr_ac) { /* store sending message to address parrot problem */ g_strlcpy(wassr_post, *buffer, WASSR_POST_LEN); twitter_debug("parrot pushed:%s\n", *buffer); } - /* strip all markups */ - if(twitter_ac || wassr_ac) - strip_markup(buffer); - /* return here if the message is not to twitter */ if(!twitter_ac) return FALSE; @@ -961,7 +947,7 @@ } /* strip all markups */ - strip_markup(buffer); //it causes missing of strings surrounded by <> + strip_markup(buffer, TRUE); /* playsound */ if(purple_prefs_get_bool(OPT_PLAYSOUND_SENDER)) {