Mercurial > pidgin.yaz
diff src/html.c @ 5093:89c0c811befa
[gaim-migrate @ 5455]
jabber XHTML support. since people tend to not like to write valid XHTML
all of the time, we now have html_to_xhtml() which does its best to figure
out what you meant. i'm tired, hope this works for everyone
committer: Tailor Script <tailor@pidgin.im>
author | Nathan Walp <nwalp@pidgin.im> |
---|---|
date | Thu, 10 Apr 2003 06:09:26 +0000 |
parents | e23a7e166680 |
children | a7e9036cd46f |
line wrap: on
line diff
--- a/src/html.c Thu Apr 10 00:57:06 2003 +0000 +++ b/src/html.c Thu Apr 10 06:09:26 2003 +0000 @@ -322,3 +322,138 @@ callback(data, g_strdup(_("g003: Error opening connection.\n")), 0); } } + +#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ + char *o = strchr(c+1, '<'); \ + char *p = strchr(c+1, '>'); \ + if(p && (!o || p < o)) { \ + if(*(p-1) != '/') \ + tags = g_list_prepend(tags, y); \ + xhtml = g_string_append(xhtml, "<" y); \ + c += strlen("<" x ); \ + xhtml = g_string_append_len(xhtml, c, (p - c) + 1); \ + c = p + 1; \ + } else { \ + xhtml = g_string_append(xhtml, "<"); \ + } \ + continue; \ + } \ + if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ + (*(c+strlen("<" x)) == '>' || \ + !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ + xhtml = g_string_append(xhtml, "<" y); \ + c += strlen("<" x); \ + if(*c != '/') \ + tags = g_list_prepend(tags, y); \ + continue; \ + } +#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) + +char *html_to_xhtml(const char *html) { + GString *xhtml = g_string_new(""); + GList *tags = NULL, *tag; + const char *q = NULL, *c = html; + char *ret; + while(*c) { + if(!q && (*c == '\"' || *c == '\'')) { + q = c; + xhtml = g_string_append_c(xhtml, *c); + c++; + } else if(q) { + if(*c == *q) { + q = NULL; + } else if(*c == '\\') { + xhtml = g_string_append_c(xhtml, *c); + c++; + } + xhtml = g_string_append_c(xhtml, *c); + c++; + } else if(*c == '<') { + if(*(c+1) == '/') { /* closing tag */ + tag = tags; + while(tag) { + if(!g_ascii_strncasecmp((c+2), tag->data, strlen(tag->data)) && *(c+strlen(tag->data)+2) == '>') { + c += strlen(tag->data) + 3; + break; + } + tag = tag->next; + } + if(tag) { + while(tags) { + g_string_append_printf(xhtml, "</%s>", (char *)tags->data); + if(tags == tag) + break; + tags = g_list_remove(tags, tags->data); + } + tags = g_list_remove(tags, tag->data); + } else { + /* we tried to close a tag we never opened! escape it + * and move on */ + xhtml = g_string_append(xhtml, "<"); + c++; + } + } else { /* opening tag */ + ALLOW_TAG("a"); + ALLOW_TAG("b"); + ALLOW_TAG("blockquote"); + ALLOW_TAG("body"); + ALLOW_TAG_ALT("bold", "b"); + ALLOW_TAG("br"); + ALLOW_TAG("cite"); + ALLOW_TAG("div"); + ALLOW_TAG("em"); + ALLOW_TAG("font"); + ALLOW_TAG("h1"); + ALLOW_TAG("h2"); + ALLOW_TAG("h3"); + ALLOW_TAG("h4"); + ALLOW_TAG("h5"); + ALLOW_TAG("h6"); + ALLOW_TAG("head"); + ALLOW_TAG("hr"); + ALLOW_TAG("html"); + ALLOW_TAG("i"); + ALLOW_TAG_ALT("italic", "i"); + ALLOW_TAG("li"); + ALLOW_TAG("ol"); + ALLOW_TAG("p"); + ALLOW_TAG("pre"); + ALLOW_TAG("q"); + ALLOW_TAG_ALT("s", "strike"); + ALLOW_TAG("span"); + ALLOW_TAG("strike"); + ALLOW_TAG("strong"); + ALLOW_TAG("sub"); + ALLOW_TAG("sup"); + ALLOW_TAG("title"); + ALLOW_TAG("u"); + ALLOW_TAG_ALT("underline","u"); + ALLOW_TAG("ul"); + + if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { + char *p = strstr(c + strlen("<!--"), "-->"); + if(p) { + xhtml = g_string_append(xhtml, "<!--"); + c += strlen("<!--"); + continue; + } + } + + xhtml = g_string_append(xhtml, "<"); + c++; + } + } else { + xhtml = g_string_append_c(xhtml, *c); + c++; + } + } + tag = tags; + while(tag) { + g_string_append_printf(xhtml, "</%s>", (char *)tag->data); + tag = tag->next; + } + g_list_free(tags); + ret = g_strdup(xhtml->str); + g_string_free(xhtml, TRUE); + return ret; +}