changeset 9161:c3fa2ad099a2

[gaim-migrate @ 9946] wing added support for yahoo profiles in, well pretty much every language. Looks pretty impressive to me. Someone may want to double check his src/util.c changes. I think we have some crazy patch writers who know those functions better than me. This also introduces a couple of warning because wing didn't add his new util.c function to util.h. Rather than adding it myself, I'm going to bug him to add it and document it. committer: Tailor Script <tailor@pidgin.im>
author Tim Ringenbach <marv@pidgin.im>
date Wed, 02 Jun 2004 00:44:51 +0000
parents dabfa4184db8
children 82aa3bc494de
files src/protocols/yahoo/Makefile.am src/protocols/yahoo/yahoo.c src/protocols/yahoo/yahoo.h src/util.c
diffstat 4 files changed, 92 insertions(+), 279 deletions(-) [+]
line wrap: on
line diff
--- a/src/protocols/yahoo/Makefile.am	Tue Jun 01 19:16:34 2004 +0000
+++ b/src/protocols/yahoo/Makefile.am	Wed Jun 02 00:44:51 2004 +0000
@@ -13,7 +13,8 @@
 	yahoochat.c \
 	util.c \
 	yahoo_filexfer.h \
-	yahoo_filexfer.c
+	yahoo_filexfer.c \
+	yahoo_profile.c
 
 AM_CFLAGS = $(st)
 
--- a/src/protocols/yahoo/yahoo.c	Tue Jun 01 19:16:34 2004 +0000
+++ b/src/protocols/yahoo/yahoo.c	Wed Jun 02 00:44:51 2004 +0000
@@ -43,13 +43,6 @@
 
 extern char *yahoo_crypt(const char *, const char *);
 
-typedef struct
-{
-	GaimConnection *gc;
-	char *name;
-} YahooGetInfoData;
-
-
 /* #define YAHOO_DEBUG */
 
 static void yahoo_add_buddy(GaimConnection *gc, const char *who, GaimGroup *);
@@ -3016,257 +3009,6 @@
 	return TRUE;
 }
 
-static void yahoo_got_info(void *data, const char *url_text, size_t len)
-{
-	YahooGetInfoData *info_data = (YahooGetInfoData *)data;
-	char *stripped, *p;
-	char buf[1024];
-	gboolean found = FALSE;
-	char *url_buffer;
-	GString *s;
-	int stripped_len;
-
-	gaim_debug_info("yahoo", "In yahoo_got_info\n");
-
-	/* we failed to grab the profile URL */
-	if (url_text == NULL || strcmp(url_text, "") == 0) {
-		gaim_notify_formatted(info_data->gc, NULL, _("Buddy Information"), NULL,
-			_("<html><body><b>Error retrieving profile</b></body></html>"),
-			  NULL, NULL);
-
-		g_free(info_data->name);
-		g_free(info_data);
-		return;
-	}
-
-	/* we don't yet support the multiple link level of the warning page for
-	 * 'adult' profiles, not to mention the fact that yahoo wants you to be
-	 * logged in (on the website) to be able to view an 'adult' profile.  for
-	 * now, just tell them that we can't help them, and provide a link to the
-	 * profile if they want to do the web browser thing.
-	 */
-	p = strstr(url_text, "Adult Profiles Warning Message");
-	if (p) {
-		g_snprintf(buf, 1024, "<html><body>%s%s<a href=\"%s%s\">%s%s</a></body></html>",
-				_("<b>Sorry, profiles marked as containing adult content are not supported at this time.</b><br><br>\n"),
-				_("If you wish to view this profile, you will need to visit this link in your web browser<br>"),
-				YAHOO_PROFILE_URL, info_data->name, YAHOO_PROFILE_URL, info_data->name);
-
-		gaim_notify_formatted(info_data->gc, NULL, _("Buddy Information"), NULL,
-				buf, NULL, NULL);
-
-		g_free(info_data->name);
-		g_free(info_data);
-		return;
-	}
-
-	/* at the moment we don't support profile pages with languages other than
-	 * english. the problem is, that every user may choose his/her own profile
-	 * language. this language has nothing to do with the preferences of the
-	 * user which looks at the profile
-	 */
-	p = strstr(url_text, "Last Updated:");
-	if (!p) {
-		p = strstr(url_text, "Last Updated&nbsp;");
-	}
-	if (!p) {
-		g_snprintf(buf, 1024, "<html><body>%s%s<a href=\"%s%s\">%s%s</a></body></html>",
-				_("<b>Sorry, non-English profiles are not supported at this time.</b><br><br>\n"),
-				_("If you wish to view this profile, you will need to visit this link in your web browser<br>"),
-				YAHOO_PROFILE_URL, info_data->name, YAHOO_PROFILE_URL, info_data->name);
-
-		gaim_notify_formatted(info_data->gc, NULL, _("Buddy Information"), NULL,
-				buf, NULL, NULL);
-
-		g_free(info_data->name);
-		g_free(info_data);
-		return;
-	}
-
-	url_buffer = g_strdup(url_text);
-
-	/*
-	 * gaim_markup_strip_html() doesn't strip out character entities like &nbsp;
-	 * and &#183;
-	*/
-	while ((p = strstr(url_buffer, "&nbsp;")) != NULL) {
-		memmove(p, p + 6, strlen(p + 6));
-		url_buffer[strlen(url_buffer) - 6] = '\0';
-	}
-	while ((p = strstr(url_buffer, "&#183;")) != NULL) {
-		memmove(p, p + 6, strlen(p + 6));
-		url_buffer[strlen(url_buffer) - 6] = '\0';
-	}
-
-	/* nuke the nasty \r's */
-	while ((p = strchr(url_buffer, '\r')) != NULL) {
-		memmove(p, p + 1, strlen(p + 1));
-		url_buffer[strlen(url_buffer) - 1] = '\0';
-	}
-
-	/* nuke the html, it's easier than trying to parse the horrid stuff */
-	stripped = gaim_markup_strip_html(url_buffer);
-	stripped_len = strlen(stripped);
-
-	gaim_debug_misc("yahoo", "stripped = %p\n", stripped);
-	gaim_debug_misc("yahoo", "url_buffer = %p\n", url_buffer);
-
-	/* gonna re-use the memory we've already got for url_buffer */
-	/* no we're not */
-	s = g_string_sized_new(strlen(url_buffer));
-	g_string_append(s, "<html><body>\n");
-
-	/* extract their Yahoo! ID and put it in. Don't bother marking has_info as
-	 * true, since the Yahoo! ID will always be there */
-	if (!gaim_markup_extract_info_field(stripped, stripped_len, s, "Yahoo! ID:", 2, "\n", 0,
-			NULL, _("Yahoo! ID"), 0, NULL))
-		g_string_append_printf(s, "<b>%s:</b> %s<br>", _("Yahoo! ID"), info_data->name);
-
-
-	/* extract their Email address and put it in */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "My Email", 5, "\n", 0,
-			"Private", _("Email"), 0, NULL);
-
-	/* extract the Nickname if it exists */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Nickname:", 1, "\n", '\n',
-			NULL, _("Nickname"), 0, NULL);
-
-	/* extract their RealName and put it in */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "RealName:", 1, "\n", '\n',
-			NULL, _("Realname"), 0, NULL);
-
-	/* extract their Location and put it in */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Location:", 2, "\n", '\n',
-			NULL, _("Location"), 0, NULL);
-
-	/* extract their Age and put it in */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Age:", 3, "\n", '\n',
-			NULL, _("Age"), 0, NULL);
-
-	/* extract their MaritalStatus and put it in */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "MaritalStatus:", 3, "\n", '\n',
-			"No Answer", _("Marital Status"), 0, NULL);
-
-	/* extract their Gender and put it in */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Gender:", 3, "\n", '\n',
-			"No Answer", _("Gender"), 0, NULL);
-
-	/* extract their Occupation and put it in */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Occupation:", 2, "\n", '\n',
-			NULL, _("Occupation"), 0, NULL);
-
-	/* Hobbies, Latest News, and Favorite Quote are a bit different, since the
-	 * values can contain embedded newlines... but any or all of them can also
-	 * not appear.  The way we delimit them is to successively look for the next
-	 * one that _could_ appear, and if all else fails, we end the section by
-	 * looking for the 'Links' heading, which is the next thing to follow this
-	 * bunch.
-	 */
-
-	if (!gaim_markup_extract_info_field(stripped, stripped_len, s, "Hobbies:", 1, "Latest News",
-			'\n', NULL, _("Hobbies"), 0, NULL))
-	{
-		if (!gaim_markup_extract_info_field(stripped, stripped_len, s, "Hobbies:", 1, "Favorite Quote",
-				'\n', NULL, _("Hobbies"), 0, NULL))
-		{
-			found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Hobbies:", 1, "Links",
-					'\n', NULL, _("Hobbies"), 0, NULL);
-		}
-		else
-			found = TRUE;
-	}
-	else
-		found = TRUE;
-
-	if (!gaim_markup_extract_info_field(stripped, stripped_len, s, "Latest News:", 1, "Favorite Quote",
-			'\n', NULL, _("Latest News"), 0, NULL))
-	{
-		found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Latest News:", 1, "Links",
-				'\n', NULL, _("Latest News"), 0, NULL);
-	}
-	else
-		found = TRUE;
-
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Favorite Quote:", 0, "Links",
-			'\n', NULL, _("Favorite Quote"), 0, NULL);
-
-	/* Home Page will either be "No home page specified",
-	 * or "Home Page: " and a link. */
-	p = strstr(stripped, "No home page specified");
-	if(!p)
-	{
-		found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Home Page:", 1, " ", 0, NULL,
-				_("Home Page"), 1, NULL);
-	}
-
-	/* Cool Link {1,2,3} is also different.  If "No cool link specified" exists,
-	 * then we have none.  If we have one however, we'll need to check and see if
-	 * we have a second one.  If we have a second one, we have to check to see if
-	 * we have a third one.
-	 */
-	p = strstr(stripped,"No cool link specified");
-	if (!p)
-	{
-		if (gaim_markup_extract_info_field(stripped, stripped_len, s, "Cool Link 1:", 1, " ", 0, NULL,
-				_("Cool Link 1"), 1, NULL))
-		{
-			found = TRUE;
-			if (gaim_markup_extract_info_field(stripped, stripped_len, s, "Cool Link 2:", 1, " ", 0, NULL,
-					_("Cool Link 2"), 1, NULL))
-							gaim_markup_extract_info_field(stripped, stripped_len, s, "Cool Link 3:", 1, " ", 0, NULL,
-						_("Cool Link 3"), 1, NULL);
-		}
-	}
-
-	/* see if Member Since is there, and if so, extract it. */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Member Since:", 1, "Last Updated:",
-			'\n', NULL, _("Member Since"), 0, NULL);
-
-	/* extract the Last Updated date and put it in */
-	found |= gaim_markup_extract_info_field(stripped, stripped_len, s, "Last Updated:", 1, "\n", '\n', NULL,
-			_("Last Updated"), 0, NULL);
-
-	/* finish off the html */
-	g_string_append(s, "</body></html>\n");
-	g_free(stripped);
-
-	if(found)
-	{
-		/* show it to the user */
-		gaim_notify_formatted(info_data->gc, NULL, _("Buddy Information"), NULL,
-							  s->str, NULL, NULL);
-	}
-	else
-	{
-		char *primary;
-		primary = g_strdup_printf(_("User information for %s unavailable"), info_data->name);
-		gaim_notify_error(info_data->gc, NULL, primary,
-				_("The user's profile is empty."));
-		g_free(primary);
-	}
-
-	g_free(url_buffer);
-	g_string_free(s, TRUE);
-	g_free(info_data->name);
-	g_free(info_data);
-}
-
-static void yahoo_get_info(GaimConnection *gc, const char *name)
-{
-	YahooGetInfoData *data;
-	char *url;
-
-	data       = g_new0(YahooGetInfoData, 1);
-	data->gc   = gc;
-	data->name = g_strdup(name);
-
-	url = g_strdup_printf("%s%s", YAHOO_PROFILE_URL, name);
-
-	gaim_url_fetch(url, FALSE, NULL, FALSE, yahoo_got_info, data);
-
-	g_free(url);
-}
-
 static void yahoo_change_buddys_group(GaimConnection *gc, const char *who,
 				   const char *old_group, const char *new_group)
 {
--- a/src/protocols/yahoo/yahoo.h	Tue Jun 01 19:16:34 2004 +0000
+++ b/src/protocols/yahoo/yahoo.h	Wed Jun 02 00:44:51 2004 +0000
@@ -225,4 +225,7 @@
  */
 char *yahoo_string_decode(GaimConnection *gc, const char *str, gboolean utf8);
 
+/* yahoo_profile.c */
+void yahoo_get_info(GaimConnection *gc, const char *name);
+
 #endif /* _YAHOO_H_ */
--- a/src/util.c	Tue Jun 01 19:16:34 2004 +0000
+++ b/src/util.c	Wed Jun 02 00:44:51 2004 +0000
@@ -768,6 +768,20 @@
 
 	q = strstr(p, end_token);
 
+	/* Trim leading blanks */
+	while (*p != '\n' && g_ascii_isspace(*p)) {
+		p += 1;
+	}
+
+	/* Trim trailing blanks */
+	while (q > p && g_ascii_isspace(*(q - 1))) {
+		q -= 1;
+	}
+
+	/* Don't bother with null strings */
+	if (p == q)
+		return FALSE;
+
 	if (q != NULL && (!no_value_token ||
 					  (no_value_token && strncmp(p, no_value_token,
 												 strlen(no_value_token)))))
@@ -1208,11 +1222,20 @@
 	g_string_free(plain, TRUE);
 }
 
+/* The following are probably reasonable changes:
+ * - \n should be converted to a normal space
+ * - in addition to <br>, <p> and <div> etc. should also be converted into \n
+ * - We want to turn </td>#whitespace<td> sequences into a single blank
+ * - We want to turn </tr>#whitespace<tr> sequences into a single \n
+ * We should remove all <script>...</script> etc. This should be fixed some time
+ */
+
 char *
 gaim_markup_strip_html(const char *str)
 {
 	int i, j, k;
 	gboolean visible = TRUE;
+	gboolean closing_td_p = FALSE;
 	gchar *str2;
 
 	if(!str)
@@ -1224,11 +1247,20 @@
 	{
 		if (str2[i] == '<')
 		{
-			if (strncasecmp(str2 + i, "<br>", 4) == 0)
+			if (strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
+			{
+				str2[j++] = ' ';
+				visible = TRUE;
+			}
+			else if (strncasecmp(str2 + i, "</td>", 5) == 0)
 			{
-				str2[j++] = '\n';
-				i = i + 3;
-				continue;
+				closing_td_p = TRUE;
+				visible = FALSE;
+			}
+			else
+			{
+				closing_td_p = FALSE;
+				visible = TRUE;
 			}
 
 			k = i + 1;
@@ -1237,28 +1269,32 @@
 				visible = TRUE;
 			else
 			{
-				while (str2[k])
+				/* Scan until we end the tag either implicitly (closed start
+				 * tag) or explicitly, using a sloppy method (i.e., < or >
+				 * inside quoted attributes will screw us up)
+				 */
+				while (str2[k] && str2[k] != '<' && str2[k] != '>')
 				{
-					if (str2[k] == '<')
-					{
-						visible = TRUE;
-						break;
-					}
-
-					if (str2[k] == '>')
-					{
-						visible = FALSE;
-						break;
-					}
-
 					k++;
 				}
+				/* Check for tags which should be mapped to newline */
+				if (strncasecmp(str2 + i, "<p>", 3) == 0
+				 || strncasecmp(str2 + i, "<tr", 3) == 0
+				 || strncasecmp(str2 + i, "<br", 3) == 0
+				 || strncasecmp(str2 + i, "<li", 3) == 0
+				 || strncasecmp(str2 + i, "<div", 4) == 0
+				 || strncasecmp(str2 + i, "</table>", 8) == 0)
+				{
+					str2[j++] = '\n';
+				}
+				/* Update the index and continue checking after the tag */
+				i = (str2[k] == '<')? k - 1: k;
+				continue;
 			}
 		}
-		else if (str2[i] == '>' && !visible)
+		else if (!g_ascii_isspace(str2[i]))
 		{
 			visible = TRUE;
-			continue;
 		}
 
 		if (str2[i] == '&' && strncasecmp(str2 + i, "&quot;", 6) == 0)
@@ -1290,7 +1326,7 @@
 		}
 
 		if (visible)
-			str2[j++] = str2[i];
+			str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
 	}
 
 	str2[j] = '\0';
@@ -2671,6 +2707,37 @@
 	return(NULL);
 }
 
+char *
+gaim_utf8_ncr_decode(const char *in)
+{
+	GString *out = g_string_new("");
+	int i;
+
+	g_return_val_if_fail(in != NULL, NULL);
+	g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL);
+
+	for (i = 0; in[i]; i += 1) {
+		gboolean ncr_found_p = FALSE;
+		if (in[i] == '&' && in[i + 1] == '#' && isdigit(in[i + 2])) {
+			gunichar wc;
+			int j;
+			for (wc = 0, j = i + 2; isdigit(in[j]); j += 1) {
+				wc *= 10;
+				wc += in[j] - '0';
+			}
+			if (in[j] == ';') { /* Technically not completely correct */
+				g_string_append_unichar(out, wc);
+				i = j;
+				ncr_found_p = TRUE;
+			}
+		}
+		if (!ncr_found_p) {
+			g_string_append_c(out, in[i]);
+		}
+	}
+	return g_string_free(out, FALSE);
+}
+
 int
 gaim_utf8_strcasecmp(const char *a, const char *b)
 {