pidgin: src/util.c comparison

comparison src/util.c @ 9161:c3fa2ad099a2

[gaim-migrate @ 9946] wing added support for yahoo profiles in, well pretty much every language. Looks pretty impressive to me. Someone may want to double check his src/util.c changes. I think we have some crazy patch writers who know those functions better than me. This also introduces a couple of warning because wing didn't add his new util.c function to util.h. Rather than adding it myself, I'm going to bug him to add it and document it. committer: Tailor Script <tailor@pidgin.im>

author	Tim Ringenbach <marv@pidgin.im>
date	Wed, 02 Jun 2004 00:44:51 +0000
parents	dabfa4184db8
children	456ef1f4ba19

comparison

equal deleted inserted replaced

-:dabfa4184db8
+:c3fa2ad099a2
 	if (check_value != '\0' && *p == check_value)
 		return FALSE;
 	q = strstr(p, end_token);
+	/* Trim leading blanks */
+	while (*p != '\n' && g_ascii_isspace(*p)) {
+		p += 1;
+	}
+	/* Trim trailing blanks */
+	while (q > p && g_ascii_isspace(*(q - 1))) {
+		q -= 1;
+	}
+	/* Don't bother with null strings */
+	if (p == q)
+		return FALSE;
 	if (q != NULL && (!no_value_token ||
 					  (no_value_token && strncmp(p, no_value_token,
 												 strlen(no_value_token)))))
 	{
 		*plain_out = g_strdup(plain->str);
 	g_string_free(xhtml, TRUE);
 	g_string_free(plain, TRUE);
 }
+/* The following are probably reasonable changes:
+* - \n should be converted to a normal space
+* - in addition to <br>, <p> and <div> etc. should also be converted into \n
+* - We want to turn </td>#whitespace<td> sequences into a single blank
+* - We want to turn </tr>#whitespace<tr> sequences into a single \n
+* We should remove all <script>...</script> etc. This should be fixed some time
+*/
 char *
 gaim_markup_strip_html(const char *str)
 {
 	int i, j, k;
 	gboolean visible = TRUE;
+	gboolean closing_td_p = FALSE;
 	gchar *str2;
 	if(!str)
 		return NULL;
 	for (i = 0, j = 0; str2[i]; i++)
 	{
 		if (str2[i] == '<')
 		{
-			if (strncasecmp(str2 + i, "<br>", 4) == 0)
+			if (strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
 			{
-				str2[j++] = '\n';
+				str2[j++] = ' ';
-				i = i + 3;
+				visible = TRUE;
-				continue;
+			}
+			else if (strncasecmp(str2 + i, "</td>", 5) == 0)
+			{
+				closing_td_p = TRUE;
+				visible = FALSE;
+			}
+			else
+			{
+				closing_td_p = FALSE;
+				visible = TRUE;
 			}
 			k = i + 1;
 			if(g_ascii_isspace(str2[k]))
 				visible = TRUE;
 			else
 			{
-				while (str2[k])
+				/* Scan until we end the tag either implicitly (closed start
+				 * tag) or explicitly, using a sloppy method (i.e., < or >
+				 * inside quoted attributes will screw us up)
+				 */
+				while (str2[k] && str2[k] != '<' && str2[k] != '>')
 				{
-					if (str2[k] == '<')
-					{
-						visible = TRUE;
-						break;
-					}
-					if (str2[k] == '>')
-					{
-						visible = FALSE;
-						break;
-					}
 					k++;
 				}
-			}
+				/* Check for tags which should be mapped to newline */
-		}
+				if (strncasecmp(str2 + i, "<p>", 3) == 0
-		else if (str2[i] == '>' && !visible)
+				 || strncasecmp(str2 + i, "<tr", 3) == 0
+				 || strncasecmp(str2 + i, "<br", 3) == 0
+				 || strncasecmp(str2 + i, "<li", 3) == 0
+				 || strncasecmp(str2 + i, "<div", 4) == 0
+				 || strncasecmp(str2 + i, "</table>", 8) == 0)
+				{
+					str2[j++] = '\n';
+				}
+				/* Update the index and continue checking after the tag */
+				i = (str2[k] == '<')? k - 1: k;
+				continue;
+			}
+		}
+		else if (!g_ascii_isspace(str2[i]))
 		{
 			visible = TRUE;
-			continue;
 		}
 		if (str2[i] == '&' && strncasecmp(str2 + i, "&quot;", 6) == 0)
 		{
 		    str2[j++] = '\"';
 			i = i + 3;
 			continue;
 		}
 		if (visible)
-			str2[j++] = str2[i];
+			str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
 	}
 	str2[j] = '\0';
 	return str2;
 	}
 	return(NULL);
 }
+char *
+gaim_utf8_ncr_decode(const char *in)
+{
+	GString *out = g_string_new("");
+	int i;
+	g_return_val_if_fail(in != NULL, NULL);
+	g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL);
+	for (i = 0; in[i]; i += 1) {
+		gboolean ncr_found_p = FALSE;
+		if (in[i] == '&' && in[i + 1] == '#' && isdigit(in[i + 2])) {
+			gunichar wc;
+			int j;
+			for (wc = 0, j = i + 2; isdigit(in[j]); j += 1) {
+				wc *= 10;
+				wc += in[j] - '0';
+			}
+			if (in[j] == ';') { /* Technically not completely correct */
+				g_string_append_unichar(out, wc);
+				i = j;
+				ncr_found_p = TRUE;
+			}
+		}
+		if (!ncr_found_p) {
+			g_string_append_c(out, in[i]);
+		}
+	}
+	return g_string_free(out, FALSE);
+}
 int
 gaim_utf8_strcasecmp(const char *a, const char *b)
 {
 	char *a_norm = NULL;
 	char *b_norm = NULL;

Mercurial > pidgin

comparison src/util.c @ 9161:c3fa2ad099a2