Mercurial > pidgin
diff src/util.c @ 7095:c8bf2da398e3
[gaim-migrate @ 7660]
html.[ch] is gone. Everything inside was namespaced and put in util.[ch].
One less ugly part of gaim in the tree.
committer: Tailor Script <tailor@pidgin.im>
author | Christian Hammond <chipx86@chipx86.com> |
---|---|
date | Wed, 01 Oct 2003 03:43:18 +0000 |
parents | 2343c3aa1dec |
children | 8130adad8b7a |
line wrap: on
line diff
--- a/src/util.c Wed Oct 01 03:01:25 2003 +0000 +++ b/src/util.c Wed Oct 01 03:43:18 2003 +0000 @@ -2,8 +2,9 @@ * @file util.h Utility Functions * @ingroup core * - * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net> - * Copyright (C) 2003 Christian Hammond <chipx86@gnupdate.org> + * Copyright (C) 1998-1999 Mark Spencer <markster@marko.net> + * 2003 Christian Hammond <chipx86@gnupdate.org> + * 2003 Nathan Walp <faceprint@faceprint.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1169,6 +1170,448 @@ } gboolean +gaim_markup_extract_info_field(const char *str, char *dest_buffer, + const char *start_token, int skip, + const char *end_token, char check_value, + const char *no_value_token, + const char *display_name, gboolean is_link, + const char *link_prefix) +{ + const char *p, *q; + char buf[1024]; + + g_return_val_if_fail(str != NULL, FALSE); + g_return_val_if_fail(dest_buffer != NULL, FALSE); + g_return_val_if_fail(start_token != NULL, FALSE); + g_return_val_if_fail(end_token != NULL, FALSE); + g_return_val_if_fail(display_name != NULL, FALSE); + + p = strstr(str, start_token); + + if (p == NULL) + return FALSE; + + p += strlen(start_token) + skip; + + if (check_value != '\0' && *p == check_value) + return FALSE; + + q = strstr(p, end_token); + + if (q != NULL && (!no_value_token || + (no_value_token && strncmp(p, no_value_token, + strlen(no_value_token))))) + { + strcat(dest_buffer, "<b>"); + strcat(dest_buffer, display_name); + strcat(dest_buffer, ":</b> "); + + if (is_link) + { + strcat(dest_buffer, "<br><a href=\""); + memcpy(buf, p, q - p); + buf[q - p] = '\0'; + + if (link_prefix) + strcat(dest_buffer, link_prefix); + + strcat(dest_buffer, buf); + strcat(dest_buffer, "\">"); + + if (link_prefix) + strcat(dest_buffer, link_prefix); + + strcat(dest_buffer, buf); + strcat(dest_buffer, "</a>"); + } + else + { + memcpy(buf, p, q - p); + buf[q - p] = '\0'; + strcat(dest_buffer, buf); + } + + strcat(dest_buffer, "<br>\n"); + + return TRUE; + } + + return FALSE; +} + +struct gaim_parse_tag { + char *src_tag; + char *dest_tag; +}; + +#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ + const char *o = c + strlen("<" x); \ + const char *p = NULL, *q = NULL, *r = NULL; \ + GString *innards = g_string_new(""); \ + while(o && *o) { \ + if(!q && (*o == '\"' || *o == '\'') ) { \ + q = o; \ + } else if(q) { \ + if(*o == *q) { \ + char *unescaped = g_strndup(q+1, o-q-1); \ + char *escaped = g_markup_escape_text(unescaped, -1); \ + g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ + g_free(unescaped); \ + g_free(escaped); \ + q = NULL; \ + } else if(*c == '\\') { \ + o++; \ + } \ + } else if(*o == '<') { \ + r = o; \ + } else if(*o == '>') { \ + p = o; \ + break; \ + } else { \ + innards = g_string_append_c(innards, *o); \ + } \ + o++; \ + } \ + if(p && !r) { \ + if(*(p-1) != '/') { \ + struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ + pt->src_tag = x; \ + pt->dest_tag = y; \ + tags = g_list_prepend(tags, pt); \ + } \ + xhtml = g_string_append(xhtml, "<" y); \ + c += strlen("<" x ); \ + xhtml = g_string_append(xhtml, innards->str); \ + xhtml = g_string_append_c(xhtml, '>'); \ + c = p + 1; \ + } else { \ + xhtml = g_string_append(xhtml, "<"); \ + plain = g_string_append_c(plain, '<'); \ + c++; \ + } \ + g_string_free(innards, TRUE); \ + continue; \ + } \ + if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ + (*(c+strlen("<" x)) == '>' || \ + !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ + xhtml = g_string_append(xhtml, "<" y); \ + c += strlen("<" x); \ + if(*c != '/') { \ + struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ + pt->src_tag = x; \ + pt->dest_tag = y; \ + tags = g_list_prepend(tags, pt); \ + xhtml = g_string_append_c(xhtml, '>'); \ + } else { \ + xhtml = g_string_append(xhtml, "/>");\ + } \ + c = strchr(c, '>') + 1; \ + continue; \ + } +#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) +void +gaim_markup_html_to_xhtml(const char *html, char **xhtml_out, + char **plain_out) +{ + GString *xhtml = g_string_new(""); + GString *plain = g_string_new(""); + GList *tags = NULL, *tag; + const char *c = html; + + while(c && *c) { + if(*c == '<') { + if(*(c+1) == '/') { /* closing tag */ + tag = tags; + while(tag) { + struct gaim_parse_tag *pt = tag->data; + if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { + c += strlen(pt->src_tag) + 3; + break; + } + tag = tag->next; + } + if(tag) { + while(tags) { + struct gaim_parse_tag *pt = tags->data; + g_string_append_printf(xhtml, "</%s>", pt->dest_tag); + if(tags == tag) + break; + tags = g_list_remove(tags, pt); + g_free(pt); + } + g_free(tag->data); + tags = g_list_remove(tags, tag->data); + } else { + /* we tried to close a tag we never opened! escape it + * and move on */ + xhtml = g_string_append(xhtml, "<"); + plain = g_string_append_c(plain, '<'); + c++; + } + } else { /* opening tag */ + ALLOW_TAG("a"); + ALLOW_TAG_ALT("b", "strong"); + ALLOW_TAG("blockquote"); + ALLOW_TAG_ALT("bold", "strong"); + ALLOW_TAG("cite"); + ALLOW_TAG("div"); + ALLOW_TAG("em"); + ALLOW_TAG("h1"); + ALLOW_TAG("h2"); + ALLOW_TAG("h3"); + ALLOW_TAG("h4"); + ALLOW_TAG("h5"); + ALLOW_TAG("h6"); + /* we only allow html to start the message */ + if(c == html) + ALLOW_TAG("html"); + ALLOW_TAG_ALT("i", "em"); + ALLOW_TAG_ALT("italic", "em"); + ALLOW_TAG("li"); + ALLOW_TAG("ol"); + ALLOW_TAG("p"); + ALLOW_TAG("pre"); + ALLOW_TAG("q"); + ALLOW_TAG("span"); + ALLOW_TAG("strong"); + ALLOW_TAG("ul"); + + /* we skip <HR> because it's not legal in XHTML-IM. However, + * we still want to send something sensible, so we put a + * linebreak in its place. <BR> also needs special handling + * because putting a </BR> to close it would just be dumb. */ + if((!g_ascii_strncasecmp(c, "<br", 3) + || !g_ascii_strncasecmp(c, "<hr", 3)) + && (*(c+3) == '>' || + !g_ascii_strncasecmp(c+3, "/>", 2) || + !g_ascii_strncasecmp(c+3, " />", 3))) { + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<br/>"); + if(*c != '\n') + plain = g_string_append_c(plain, '\n'); + continue; + } + if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { + struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); + pt->src_tag = *(c+2) == '>' ? "u" : "underline"; + pt->dest_tag = "span"; + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); + continue; + } + if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { + struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); + pt->src_tag = *(c+2) == '>' ? "s" : "strike"; + pt->dest_tag = "span"; + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); + continue; + } + if(!g_ascii_strncasecmp(c, "<sub>", 5)) { + struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); + pt->src_tag = "sub"; + pt->dest_tag = "span"; + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); + continue; + } + if(!g_ascii_strncasecmp(c, "<sup>", 5)) { + struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); + pt->src_tag = "sup"; + pt->dest_tag = "span"; + tags = g_list_prepend(tags, pt); + c = strchr(c, '>') + 1; + xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); + continue; + } + if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { + const char *p = c; + GString *style = g_string_new(""); + struct gaim_parse_tag *pt; + while(*p && *p != '>') { + if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) { + const char *q = p + strlen("color="); + GString *color = g_string_new(""); + if(*q == '\'' || *q == '\"') + q++; + while(*q && *q != '\"' && *q != '\'' && *q != ' ') { + color = g_string_append_c(color, *q); + q++; + } + g_string_append_printf(style, "color: %s; ", color->str); + g_string_free(color, TRUE); + p = q; + } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) { + const char *q = p + strlen("face="); + gboolean space_allowed = FALSE; + GString *face = g_string_new(""); + if(*q == '\'' || *q == '\"') { + space_allowed = TRUE; + q++; + } + while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) { + face = g_string_append_c(face, *q); + q++; + } + g_string_append_printf(style, "font-family: %s; ", face->str); + g_string_free(face, TRUE); + p = q; + } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) { + const char *q = p + strlen("size="); + int sz; + const char *size = "medium"; + if(*q == '\'' || *q == '\"') + q++; + sz = atoi(q); + if(sz < 3) + size = "smaller"; + else if(sz > 3) + size = "larger"; + g_string_append_printf(style, "font-size: %s; ", size); + p = q; + } + p++; + } + c = strchr(c, '>') + 1; + pt = g_new0(struct gaim_parse_tag, 1); + pt->src_tag = "font"; + pt->dest_tag = "span"; + tags = g_list_prepend(tags, pt); + xhtml = g_string_append(xhtml, "<span"); + if(style->len) + g_string_append_printf(xhtml, " style='%s'", style->str); + xhtml = g_string_append_c(xhtml, '>'); + g_string_free(style, TRUE); + continue; + } + if(!g_ascii_strncasecmp(c, "<body ", 6)) { + const char *p = c; + gboolean did_something = FALSE; + while(*p && *p != '>') { + if(!g_ascii_strncasecmp(p, "bgcolor=", strlen("bgcolor="))) { + const char *q = p + strlen("bgcolor="); + struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); + GString *color = g_string_new(""); + if(*q == '\'' || *q == '\"') + q++; + while(*q && *q != '\"' && *q != '\'' && *q != ' ') { + color = g_string_append_c(color, *q); + q++; + } + g_string_append_printf(xhtml, "<span style='background: %s;'>", color->str); + g_string_free(color, TRUE); + c = strchr(c, '>') + 1; + pt->src_tag = "body"; + pt->dest_tag = "span"; + tags = g_list_prepend(tags, pt); + did_something = TRUE; + break; + } + p++; + } + if(did_something) continue; + } + /* this has to come after the special case for bgcolor */ + ALLOW_TAG("body"); + if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { + char *p = strstr(c + strlen("<!--"), "-->"); + if(p) { + xhtml = g_string_append(xhtml, "<!--"); + c += strlen("<!--"); + continue; + } + } + + xhtml = g_string_append(xhtml, "<"); + plain = g_string_append_c(plain, '<'); + c++; + } + } else { + xhtml = g_string_append_c(xhtml, *c); + plain = g_string_append_c(plain, *c); + c++; + } + } + tag = tags; + while(tag) { + g_string_append_printf(xhtml, "</%s>", (char *)tag->data); + tag = tag->next; + } + g_list_free(tags); + if(xhtml_out) + *xhtml_out = g_strdup(xhtml->str); + if(plain_out) + *plain_out = g_strdup(plain->str); + g_string_free(xhtml, TRUE); + g_string_free(plain, TRUE); +} + +char * +gaim_markup_strip_html(const char *str) +{ + int i, j, k; + gboolean visible = TRUE; + gchar *str2; + + g_return_val_if_fail(str != NULL, NULL); + + str2 = g_strdup(str); + + for (i = 0, j = 0; str2[i]; i++) + { + if (str2[i] == '<') + { + k = i + 1; + + if(g_ascii_isspace(str2[k])) + visible = TRUE; + else + { + while (str2[k]) + { + if (str2[k] == '<') + { + visible = TRUE; + break; + } + + if (str2[k] == '>') + { + visible = FALSE; + break; + } + + k++; + } + } + } + else if (str2[i] == '>' && !visible) + { + visible = TRUE; + continue; + } + + if (str2[i] == '&' && strncasecmp(str2 + i, """, 6) == 0) + { + str2[j++] = '\"'; + i = i + 5; + continue; + } + + if (visible) + str2[j++] = str2[i]; + } + + str2[j] = '\0'; + + return str2; +} + +gboolean gaim_url_parse(const char *url, char **ret_host, int *ret_port, char **ret_path) {