changeset 336:8de140bc8d13

better handling of non-ascii tag. now it properly handles tags in which the first ascii latter occurs in the beginning or mid of a non-ascii word.
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Wed, 14 Oct 2009 14:55:55 +0900
parents 34ecd09ce4f8
children 9f78fb6bfc76
files main.c pidgin-twitter.h
diffstat 2 files changed, 2 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/main.c	Tue Oct 13 14:07:55 2009 +0900
+++ b/main.c	Wed Oct 14 14:55:55 2009 +0900
@@ -1244,7 +1244,7 @@
     regp[PSEUDO]    = g_regex_new(P_PSEUDO,  G_REGEX_RAW, 0, NULL);
     regp[USER]      = g_regex_new(P_USER, 0, 0, NULL);
     regp[CHANNEL_WASSR]  = g_regex_new(P_CHANNEL, 0, 0, NULL);
-    regp[TAG_TWITTER]    = g_regex_new(P_TAG_TWITTER, 0, 0, NULL);
+    regp[TAG_TWITTER]    = g_regex_new(P_TAG_TWITTER, G_REGEX_RAW, 0, NULL);
     regp[TAG_IDENTICA]   = g_regex_new(P_TAG_IDENTICA, 0, 0, NULL);
     regp[GROUP_IDENTICA] = g_regex_new(P_GROUP_IDENTICA, 0, 0, NULL);
     regp[IMAGE_TWITTER]  = g_regex_new(P_IMAGE_TWITTER, 0, 0, NULL);
--- a/pidgin-twitter.h	Tue Oct 13 14:07:55 2009 +0900
+++ b/pidgin-twitter.h	Wed Oct 14 14:55:55 2009 +0900
@@ -186,7 +186,7 @@
 #define P_PSEUDO            "^\\s*(?:[\"#$%&'()*+,\\-./:;<=>?\\[\\\\\\]_`{|}~]|[^\\s\\x21-\\x7E])*([dDfFgGlLmMnNtTwW]{1})(?:\\Z|\\s+|[^\\x21-\\x7E]+\\Z)"
 #define P_USER              "^.*?(?:<a .+?>)?([-A-Za-z0-9_]+)(?:</a>)?:"
 #define P_CHANNEL           "^(.*?(?:<a .+?>)?[-A-Za-z0-9_]+(?:</a>)?: \\r?\\n?#)([A-Za-z0-9_]+) "
-#define P_TAG_TWITTER       "(^|\\s+)#([-A-Za-z0-9_]+|\\S+_)"
+#define P_TAG_TWITTER       "(^|\\s+)#([^-A-Za-z0-9_]*[-A-Za-z0-9_]+\\S*)"
 #define P_TAG_IDENTICA      "#([-A-Za-z0-9_]+)"
 #define P_GROUP_IDENTICA    "!([-A-Za-z0-9_]+)"
 #define P_IMAGE_TWITTER     "<profile_image_url>(https?://.+?)</profile_image_url>"