# HG changeset patch # User Yoshiki Yazawa # Date 1210634798 -32400 # Node ID 746ff3b54c103405226702a219a50e2e562c0276 # Parent 4f456c7150edaf390fc5d1a624ff2dda7471ba11 trying another way to strip markups. in this revision, all markups are striped with strip_html_markup() on sending a message. diff -r 4f456c7150ed -r 746ff3b54c10 pidgin-twitter.c --- a/pidgin-twitter.c Tue May 13 03:42:45 2008 +0900 +++ b/pidgin-twitter.c Tue May 13 08:26:38 2008 +0900 @@ -35,13 +35,11 @@ #define SENDER 1 #define COMMAND 2 #define PSEUDO 3 -#define EXCESS_MARKUP 4 -#define FONT_MARKUP 5 -#define ATMARK_AFTER_A 6 #define PLUGIN_ID "gtk-honeyplanet-pidgin_twitter" #define PLUGIN_NAME "pidgin-twitter" +/* options */ #define OPT_PIDGINTWITTER "/plugins/pidgin_twitter" #define OPT_TRANSLATE_RECIPIENT OPT_PIDGINTWITTER "/translate_recipient" #define OPT_TRANSLATE_SENDER OPT_PIDGINTWITTER "/translate_sender" @@ -55,20 +53,28 @@ #define OPT_COUNTER OPT_PIDGINTWITTER "/counter" #define OPT_SUPPRESS_OOPS OPT_PIDGINTWITTER "/suppress_oops" +/* formats and templates */ #define RECIPIENT_FORMAT "@%s" #define SENDER_FORMAT "%s: " #define DEFAULT_LIST "(list of users: separated with ' ,:;')" #define OOPS_MESSAGE "Oops! Your update was over 140 characters. We sent the short version to your friends (they can view the entire update on the web).
" +/* patterns */ +#define P_RECIPIENT "@([A-Za-z0-9_]+)" +#define P_SENDER "([A-Za-z0-9_]+): " +#define P_COMMAND "^(?:\\s*)([dDfFgGlLmMnNtTwW]{1}\\s+[A-Za-z0-9_]+)(?:\\s*\\Z)" +#define P_PSEUDO "^\\s*(?:[\"#$%&'()*+,\\-./:;<=>?\\[\\\\\\]_`{|}~]|[^\\s\\x21-\\x7E])*([dDfFgGlLmMnNtTwW]{1})(?:\\Z|\\s+|[^\\x21-\\x7E]+\\Z)" + +/* debug macros */ #define twitter_debug(fmt, ...) purple_debug(PURPLE_DEBUG_INFO, PLUGIN_NAME, "%s():%4d: " fmt, __FUNCTION__, (int)__LINE__, ## __VA_ARGS__); #define twitter_error(fmt, ...) purple_debug(PURPLE_DEBUG_ERROR, PLUGIN_NAME, "%s():%4d: " fmt, __FUCTION__, (int)__LINE__, ## __VA_ARGS__); + /* globals */ -static GRegex *regp[7]; +static GRegex *regp[4]; static gboolean suppress_oops = FALSE; /* prototypes */ -static void strip_excess_markup(gchar **str); static void escape(gchar **str); static gboolean sending_im_cb(PurpleAccount *account, char *recipient, char **buffer, void *data); static gboolean eval(const GMatchInfo *match_info, GString *result, gpointer user_data); @@ -92,34 +98,166 @@ static void init_plugin(PurplePlugin *plugin); -/* implementation */ -static void -strip_excess_markup(gchar **str) +/* tentative: this function is a modified clone of purple_markup_strip_html() */ +static char * +strip_html_markup(const char *str) { - gchar *newstr = NULL; + int i, j, k, entlen; + gboolean visible = TRUE; + gboolean closing_td_p = FALSE; + gchar *str2; + const gchar *cdata_close_tag = NULL, *ent; + gchar *href = NULL; + int href_st = 0; + + if(!str) + return NULL; + + str2 = g_strdup(str); - /* strip font tag */ - newstr = g_regex_replace(regp[FONT_MARKUP], *str, -1, 0, "\\1", 0, NULL); - twitter_debug("*str = %s newstr = %s\n", *str, newstr); - g_free(*str); - *str = newstr; + for (i = 0, j = 0; str2[i]; i++) + { + if (str2[i] == '<') + { + if (cdata_close_tag) + { + /* Note: Don't even assume any other tag is a tag in CDATA */ + if (g_ascii_strncasecmp(str2 + i, cdata_close_tag, + strlen(cdata_close_tag)) == 0) + { + i += strlen(cdata_close_tag) - 1; + cdata_close_tag = NULL; + } + continue; + } + else if (g_ascii_strncasecmp(str2 + i, "", 5) == 0) + { + closing_td_p = TRUE; + visible = FALSE; + } + else + { + closing_td_p = FALSE; + visible = TRUE; + } + + k = i + 1; + + if(g_ascii_isspace(str2[k])) + visible = TRUE; + else if (str2[k]) + { + /* Scan until we end the tag either implicitly (closed start + * tag) or explicitly, using a sloppy method (i.e., < or > + * inside quoted attributes will screw us up) + */ + while (str2[k] && str2[k] != '<' && str2[k] != '>') + { + k++; + } - /* move @ prior to anchor tag */ - newstr = g_regex_replace(regp[ATMARK_AFTER_A], *str, -1, 0, "@\\1", 0, NULL); - twitter_debug("*str = %s newstr = %s\n", *str, newstr); - g_free(*str); - *str = newstr; + /* If we've got an tag with an href, save the address + * to print later. */ + if (g_ascii_strncasecmp(str2 + i, "", 3) == 0 + || g_ascii_strncasecmp(str2 + i, "", 8) == 0) + { + str2[j++] = '\n'; + } + else if (g_ascii_strncasecmp(str2 + i, " 140 && utflen <= 140) suppress_oops = TRUE; @@ -222,7 +373,7 @@ &which, // user data NULL); // error handler - twitter_debug("*str = %s newstr = %s\n", *str, newstr); + twitter_debug("which = %d *str = %s newstr = %s\n", which, *str, newstr); g_free(*str); *str = newstr; @@ -272,10 +423,14 @@ writing_im_cb(PurpleAccount *account, char *sender, char **buffer, PurpleConversation *conv, int *flags, void *data) { + twitter_debug("called\n"); + /* check if the message is from twitter */ if(!is_twitter_account(account, sender)) return FALSE; + /* strip all markups */ + strip_markup(buffer); /* playsound */ if(purple_prefs_get_bool(OPT_PLAYSOUND_SENDER)) { @@ -285,9 +440,6 @@ playsound(buffer, RECIPIENT); } - /* strip excess markup */ - strip_excess_markup(buffer); - /* translate */ if(purple_prefs_get_bool(OPT_TRANSLATE_SENDER)) { translate(buffer, SENDER); @@ -311,15 +463,13 @@ PidginConversation *gtkconv = (PidginConversation *)user_data; GtkWidget *box, *counter = NULL; gchar *markup = NULL; + guint count; g_return_if_fail(gtkconv != NULL); - guint count = gtk_text_buffer_get_char_count(textbuffer) + + count = gtk_text_buffer_get_char_count(textbuffer) + (unsigned int)g_utf8_strlen(new_text, -1); -// twitter_debug("new_text = %s utf8_strlen = %ld new_text_length = %d\n", -// new_text, g_utf8_strlen(new_text, -1), new_text_length); - markup = g_markup_printf_escaped("%u", count <= 140 ? "black" : "red", count); @@ -493,6 +643,7 @@ receiving_im_cb(PurpleAccount *account, char **sender, char **buffer, PurpleConversation *conv, PurpleMessageFlags *flags, void *data) { + twitter_debug("called\n"); twitter_debug("buffer = %s suppress_oops = %d\n", *buffer, suppress_oops); if(!suppress_oops || !purple_prefs_get_bool(OPT_SUPPRESS_OOPS)) @@ -524,21 +675,10 @@ plugin, PURPLE_CALLBACK(receiving_im_cb), NULL); /* compile regex */ - regp[RECIPIENT] = g_regex_new("@([A-Za-z0-9_]+)", 0, 0, NULL); - regp[SENDER] = g_regex_new("([A-Za-z0-9_]+): ", 0, 0, NULL); - regp[COMMAND] = - g_regex_new("^(?:\\s*)([dDfFgGlLmMnNtTwW]{1}\\s+[A-Za-z0-9_]+)(?:\\s*\\Z)", - G_REGEX_RAW, 0, NULL); - regp[PSEUDO] = - g_regex_new - ("^\\s*(?:[\"#$%&'()*+,\\-./:;<=>?\\[\\\\\\]_`{|}~]|[^\\s\\x21-\\x7E])*([dDfFgGlLmMnNtTwW]{1})(?:\\Z|\\s+|[^\\x21-\\x7E]+\\Z)", - G_REGEX_RAW, 0, NULL); - regp[EXCESS_MARKUP] = - g_regex_new - ("\\1", 0, 0, - NULL); - regp[FONT_MARKUP] = g_regex_new("(.+?)", 0, 0, NULL); - regp[ATMARK_AFTER_A] = g_regex_new("()@", 0, 0, NULL); + regp[RECIPIENT] = g_regex_new(P_RECIPIENT, 0, 0, NULL); + regp[SENDER] = g_regex_new(P_SENDER, 0, 0, NULL); + regp[COMMAND] = g_regex_new(P_COMMAND, G_REGEX_RAW, 0, NULL); + regp[PSEUDO] = g_regex_new(P_PSEUDO, G_REGEX_RAW, 0, NULL); /* attach counter to the existing twitter window */ gboolean enabled = purple_prefs_get_bool(OPT_COUNTER); @@ -573,9 +713,6 @@ g_regex_unref(regp[SENDER]); g_regex_unref(regp[COMMAND]); g_regex_unref(regp[PSEUDO]); - g_regex_unref(regp[EXCESS_MARKUP]); - g_regex_unref(regp[FONT_MARKUP]); - g_regex_unref(regp[ATMARK_AFTER_A]); /* detach from twitter window */ detach_from_window(); @@ -588,6 +725,7 @@ gconstpointer val, gpointer data) { gboolean enabled = purple_prefs_get_bool(OPT_COUNTER); + if(enabled) { attach_to_window(); }