# HG changeset patch # User Christian Hammond # Date 1064979840 0 # Node ID d88d79507276d826b582b53dfab8c8bb62765b54 # Parent c8bf2da398e3290f52ea284cf4ab83cb089ed281 [gaim-migrate @ 7661] And nuke html.[ch]. committer: Tailor Script diff -r c8bf2da398e3 -r d88d79507276 src/html.c --- a/src/html.c Wed Oct 01 03:43:18 2003 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,427 +0,0 @@ -/* - * gaim - * - * Copyright (C) 1998-1999, Mark Spencer - * 2003, Nathan Walp - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ -#include "internal.h" - -#include "debug.h" -#include "html.h" -#include "proxy.h" - -#include "gaim.h" - -gchar *strip_html(const gchar *text) -{ - int i, j, k; - int visible = 1; - gchar *text2 = g_strdup(text); - - if(!text) - return NULL; - - for (i = 0, j = 0; text2[i]; i++) { - if (text2[i] == '<') { - k = i + 1; - if(g_ascii_isspace(text2[k])) { - visible = 1; - } else { - while (text2[k]) { - if (text2[k] == '<') { - visible = 1; - break; - } - if (text2[k] == '>') { - visible = 0; - break; - } - k++; - } - } - } else if (text2[i] == '>' && !visible) { - visible = 1; - continue; - } - if (text2[i] == '&' && strncasecmp(text2+i,""",6) == 0) { - text2[j++] = '\"'; - i = i+5; - continue; - } - if (visible) { - text2[j++] = text2[i]; - } - } - text2[j] = '\0'; - return text2; -} - -struct gaim_parse_tag { - char *src_tag; - char *dest_tag; -}; - -#define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ - const char *o = c + strlen("<" x); \ - const char *p = NULL, *q = NULL, *r = NULL; \ - GString *innards = g_string_new(""); \ - while(o && *o) { \ - if(!q && (*o == '\"' || *o == '\'') ) { \ - q = o; \ - } else if(q) { \ - if(*o == *q) { \ - char *unescaped = g_strndup(q+1, o-q-1); \ - char *escaped = g_markup_escape_text(unescaped, -1); \ - g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ - g_free(unescaped); \ - g_free(escaped); \ - q = NULL; \ - } else if(*c == '\\') { \ - o++; \ - } \ - } else if(*o == '<') { \ - r = o; \ - } else if(*o == '>') { \ - p = o; \ - break; \ - } else { \ - innards = g_string_append_c(innards, *o); \ - } \ - o++; \ - } \ - if(p && !r) { \ - if(*(p-1) != '/') { \ - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ - pt->src_tag = x; \ - pt->dest_tag = y; \ - tags = g_list_prepend(tags, pt); \ - } \ - xhtml = g_string_append(xhtml, "<" y); \ - c += strlen("<" x ); \ - xhtml = g_string_append(xhtml, innards->str); \ - xhtml = g_string_append_c(xhtml, '>'); \ - c = p + 1; \ - } else { \ - xhtml = g_string_append(xhtml, "<"); \ - plain = g_string_append_c(plain, '<'); \ - c++; \ - } \ - g_string_free(innards, TRUE); \ - continue; \ - } \ - if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ - (*(c+strlen("<" x)) == '>' || \ - !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ - xhtml = g_string_append(xhtml, "<" y); \ - c += strlen("<" x); \ - if(*c != '/') { \ - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ - pt->src_tag = x; \ - pt->dest_tag = y; \ - tags = g_list_prepend(tags, pt); \ - xhtml = g_string_append_c(xhtml, '>'); \ - } else { \ - xhtml = g_string_append(xhtml, "/>");\ - } \ - c = strchr(c, '>') + 1; \ - continue; \ - } -#define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) - -void html_to_xhtml(const char *html, char **xhtml_out, char **plain_out) { - GString *xhtml = g_string_new(""); - GString *plain = g_string_new(""); - GList *tags = NULL, *tag; - const char *c = html; - - while(c && *c) { - if(*c == '<') { - if(*(c+1) == '/') { /* closing tag */ - tag = tags; - while(tag) { - struct gaim_parse_tag *pt = tag->data; - if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { - c += strlen(pt->src_tag) + 3; - break; - } - tag = tag->next; - } - if(tag) { - while(tags) { - struct gaim_parse_tag *pt = tags->data; - g_string_append_printf(xhtml, "", pt->dest_tag); - if(tags == tag) - break; - tags = g_list_remove(tags, pt); - g_free(pt); - } - g_free(tag->data); - tags = g_list_remove(tags, tag->data); - } else { - /* we tried to close a tag we never opened! escape it - * and move on */ - xhtml = g_string_append(xhtml, "<"); - plain = g_string_append_c(plain, '<'); - c++; - } - } else { /* opening tag */ - ALLOW_TAG("a"); - ALLOW_TAG_ALT("b", "strong"); - ALLOW_TAG("blockquote"); - ALLOW_TAG_ALT("bold", "strong"); - ALLOW_TAG("cite"); - ALLOW_TAG("div"); - ALLOW_TAG("em"); - ALLOW_TAG("h1"); - ALLOW_TAG("h2"); - ALLOW_TAG("h3"); - ALLOW_TAG("h4"); - ALLOW_TAG("h5"); - ALLOW_TAG("h6"); - /* we only allow html to start the message */ - if(c == html) - ALLOW_TAG("html"); - ALLOW_TAG_ALT("i", "em"); - ALLOW_TAG_ALT("italic", "em"); - ALLOW_TAG("li"); - ALLOW_TAG("ol"); - ALLOW_TAG("p"); - ALLOW_TAG("pre"); - ALLOW_TAG("q"); - ALLOW_TAG("span"); - ALLOW_TAG("strong"); - ALLOW_TAG("ul"); - - /* we skip
because it's not legal in XHTML-IM. However, - * we still want to send something sensible, so we put a - * linebreak in its place.
also needs special handling - * because putting a
to close it would just be dumb. */ - if((!g_ascii_strncasecmp(c, "' || - !g_ascii_strncasecmp(c+3, "/>", 2) || - !g_ascii_strncasecmp(c+3, " />", 3))) { - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, "
"); - if(*c != '\n') - plain = g_string_append_c(plain, '\n'); - continue; - } - if(!g_ascii_strncasecmp(c, "", 3) || !g_ascii_strncasecmp(c, "", strlen(""))) { - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = *(c+2) == '>' ? "u" : "underline"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, ""); - continue; - } - if(!g_ascii_strncasecmp(c, "", 3) || !g_ascii_strncasecmp(c, "", strlen(""))) { - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = *(c+2) == '>' ? "s" : "strike"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, ""); - continue; - } - if(!g_ascii_strncasecmp(c, "", 5)) { - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = "sub"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, ""); - continue; - } - if(!g_ascii_strncasecmp(c, "", 5)) { - struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = "sup"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - c = strchr(c, '>') + 1; - xhtml = g_string_append(xhtml, ""); - continue; - } - if(!g_ascii_strncasecmp(c, "' || *(c+5) == ' ')) { - const char *p = c; - GString *style = g_string_new(""); - struct gaim_parse_tag *pt; - while(*p && *p != '>') { - if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) { - const char *q = p + strlen("color="); - GString *color = g_string_new(""); - if(*q == '\'' || *q == '\"') - q++; - while(*q && *q != '\"' && *q != '\'' && *q != ' ') { - color = g_string_append_c(color, *q); - q++; - } - g_string_append_printf(style, "color: %s; ", color->str); - g_string_free(color, TRUE); - p = q; - } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) { - const char *q = p + strlen("face="); - gboolean space_allowed = FALSE; - GString *face = g_string_new(""); - if(*q == '\'' || *q == '\"') { - space_allowed = TRUE; - q++; - } - while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) { - face = g_string_append_c(face, *q); - q++; - } - g_string_append_printf(style, "font-family: %s; ", face->str); - g_string_free(face, TRUE); - p = q; - } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) { - const char *q = p + strlen("size="); - int sz; - const char *size = "medium"; - if(*q == '\'' || *q == '\"') - q++; - sz = atoi(q); - if(sz < 3) - size = "smaller"; - else if(sz > 3) - size = "larger"; - g_string_append_printf(style, "font-size: %s; ", size); - p = q; - } - p++; - } - c = strchr(c, '>') + 1; - pt = g_new0(struct gaim_parse_tag, 1); - pt->src_tag = "font"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - xhtml = g_string_append(xhtml, "len) - g_string_append_printf(xhtml, " style='%s'", style->str); - xhtml = g_string_append_c(xhtml, '>'); - g_string_free(style, TRUE); - continue; - } - if(!g_ascii_strncasecmp(c, "", color->str); - g_string_free(color, TRUE); - c = strchr(c, '>') + 1; - pt->src_tag = "body"; - pt->dest_tag = "span"; - tags = g_list_prepend(tags, pt); - did_something = TRUE; - break; - } - p++; - } - if(did_something) continue; - } - /* this has to come after the special case for bgcolor */ - ALLOW_TAG("body"); - if(!g_ascii_strncasecmp(c, ""); - if(p) { - xhtml = g_string_append(xhtml, "