Mercurial > pidgin
view src/util.c @ 7103:5dc4ed905a1a
[gaim-migrate @ 7668]
this caused problems
committer: Tailor Script <tailor@pidgin.im>
author | Nathan Walp <nwalp@pidgin.im> |
---|---|
date | Wed, 01 Oct 2003 05:31:28 +0000 |
parents | 8130adad8b7a |
children | 9d0e74b6ca68 |
line wrap: on
line source
/* * @file util.h Utility Functions * @ingroup core * * Copyright (C) 1998-1999 Mark Spencer <markster@marko.net> * 2003 Christian Hammond <chipx86@gnupdate.org> * 2003 Nathan Walp <faceprint@faceprint.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "internal.h" #include "conversation.h" #include "debug.h" #include "prpl.h" #include "prefs.h" #include "util.h" typedef struct { void (*callback)(void *, const char *, size_t); void *user_data; struct { char *address; int port; char *page; } website; char *url; gboolean full; char *user_agent; gboolean http11; int inpa; gboolean sentreq; gboolean newline; gboolean startsaving; char *webdata; unsigned long len; unsigned long data_len; } GaimFetchUrlData; static char home_dir[MAXPATHLEN]; char *full_date() { char *date; time_t tme; time(&tme); date = ctime(&tme); date[strlen(date) - 1] = '\0'; return date; } G_GNUC_CONST static gint badchar(char c) { switch (c) { case ' ': case ',': case '(': case ')': case '\0': case '\n': case '<': case '>': case '"': case '\'': return 1; default: return 0; } } gchar *sec_to_text(guint sec) { guint daze, hrs, min; char *ret = NULL; daze = sec / (60 * 60 * 24); hrs = (sec % (60 * 60 * 24)) / (60 * 60); min = (sec % (60 * 60)) / 60; sec = min % 60; if (daze) { if (hrs || min) { if (hrs) { if (min) { ret = g_strdup_printf( "%d %s, %d %s, %d %s.", daze, ngettext("day","days",daze), hrs, ngettext("hour","hours",hrs), min, ngettext("minute","minutes",min)); } else { ret = g_strdup_printf( "%d %s, %d %s.", daze, ngettext("day","days",daze), hrs, ngettext("hour","hours",hrs)); } } else { ret = g_strdup_printf( "%d %s, %d %s.", daze, ngettext("day","days",daze), min, ngettext("minute","minutes",min)); } } else ret = g_strdup_printf("%d %s.", daze, ngettext("day","days",daze)); } else { if (hrs) { if (min) { ret = g_strdup_printf( "%d %s, %d %s.", hrs, ngettext("hour","hours",hrs), min, ngettext("minute","minutes",min)); } else { ret = g_strdup_printf("%d %s.", hrs, ngettext("hour","hours",hrs)); } } else { ret = g_strdup_printf("%d %s.", min, ngettext("minute","minutes",min)); } } return ret; } char *linkify_text(const char *text) { const char *c, *t, *q = NULL; char *tmp; char url_buf[BUF_LEN * 4]; GString *ret = g_string_new(""); /* Assumes you have a buffer able to cary at least BUF_LEN * 2 bytes */ c = text; while (*c) { if(!q && (*c == '\"' || *c == '\'')) { q = c; } else if(q) { if(*c == *q) q = NULL; } else if (!g_ascii_strncasecmp(c, "<A", 2)) { while (1) { if (!g_ascii_strncasecmp(c, "/A>", 3)) { break; } ret = g_string_append_c(ret, *c); c++; if (!(*c)) break; } } else if ((*c=='h') && (!g_ascii_strncasecmp(c, "http://", 7) || (!g_ascii_strncasecmp(c, "https://", 8)))) { t = c; while (1) { if (badchar(*t)) { if (*(t) == ',' && (*(t + 1) != ' ')) { t++; continue; } if (*(t - 1) == '.') t--; strncpy(url_buf, c, t - c); url_buf[t - c] = 0; g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", url_buf, url_buf); c = t; break; } if (!t) break; t++; } } else if (!g_ascii_strncasecmp(c, "www.", 4)) { if (c[4] != '.') { t = c; while (1) { if (badchar(*t)) { if (t - c == 4) { break; } if (*(t) == ',' && (*(t + 1) != ' ')) { t++; continue; } if (*(t - 1) == '.') t--; strncpy(url_buf, c, t - c); url_buf[t - c] = 0; g_string_append_printf(ret, "<A HREF=\"http://%s\">%s</A>", url_buf, url_buf); c = t; break; } if (!t) break; t++; } } } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) { t = c; while (1) { if (badchar(*t)) { if (*(t - 1) == '.') t--; strncpy(url_buf, c, t - c); url_buf[t - c] = 0; g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", url_buf, url_buf); c = t; break; } if (!t) break; t++; } } else if (!g_ascii_strncasecmp(c, "ftp.", 4)) { if (c[4] != '.') { t = c; while (1) { if (badchar(*t)) { if (t - c == 4) { break; } if (*(t - 1) == '.') t--; strncpy(url_buf, c, t - c); url_buf[t - c] = 0; g_string_append_printf(ret, "<A HREF=\"ftp://%s\">%s</A>", url_buf, url_buf); c = t; break; } if (!t) break; t++; } } } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) { t = c; while (1) { if (badchar(*t)) { if (*(t - 1) == '.') t--; strncpy(url_buf, c, t - c); url_buf[t - c] = 0; g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", url_buf, url_buf); c = t; break; } if (!t) break; t++; } } else if (c != text && (*c == '@')) { char *tmp; int flag; int len = 0; const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0"; url_buf[0] = 0; if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1))) flag = 0; else flag = 1; t = c; while (flag) { if (badchar(*t)) { ret = g_string_truncate(ret, ret->len - (len - 1)); break; } else { len++; tmp = g_malloc(len + 1); tmp[len] = 0; tmp[0] = *t; strncpy(tmp + 1, url_buf, len - 1); strcpy(url_buf, tmp); url_buf[len] = 0; g_free(tmp); t--; if (t < text) { ret = g_string_assign(ret, ""); break; } } } t = c + 1; while (flag) { if (badchar(*t)) { char *d; for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--) *d = '\0'; g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>", url_buf, url_buf); c = t; break; } else { strncat(url_buf, t, 1); len++; url_buf[len] = 0; } t++; } } if (*c == 0) break; ret = g_string_append_c(ret, *c); c++; } tmp = ret->str; g_string_free(ret, FALSE); return tmp; } static const char alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" "0123456789+/"; char *tobase64(const unsigned char *in, size_t inlen) { char *out, *rv; rv = out = g_malloc((4 * (inlen + 1)) / 3 + 1); for (; inlen >= 3; inlen -= 3) { *out++ = alphabet[in[0] >> 2]; *out++ = alphabet[((in[0] << 4) & 0x30) | (in[1] >> 4)]; *out++ = alphabet[((in[1] << 2) & 0x3c) | (in[2] >> 6)]; *out++ = alphabet[in[2] & 0x3f]; in += 3; } if (inlen > 0) { unsigned char fragment; *out++ = alphabet[in[0] >> 2]; fragment = (in[0] << 4) & 0x30; if (inlen > 1) fragment |= in[1] >> 4; *out++ = alphabet[fragment]; *out++ = (inlen < 2) ? '=' : alphabet[(in[1] << 2) & 0x3c]; *out++ = '='; } *out = '\0'; return rv; } void frombase64(const char *text, char **data, int *size) { char *out = NULL; char tmp = 0; const char *c; gint32 tmp2 = 0; int len = 0, n = 0; if (!text || !data) return; c = text; while (*c) { if (*c >= 'A' && *c <= 'Z') { tmp = *c - 'A'; } else if (*c >= 'a' && *c <= 'z') { tmp = 26 + (*c - 'a'); } else if (*c >= '0' && *c <= 57) { tmp = 52 + (*c - '0'); } else if (*c == '+') { tmp = 62; } else if (*c == '/') { tmp = 63; } else if (*c == '\r' || *c == '\n') { c++; continue; } else if (*c == '=') { if (n == 3) { out = g_realloc(out, len + 2); out[len] = (char)(tmp2 >> 10) & 0xff; len++; out[len] = (char)(tmp2 >> 2) & 0xff; len++; } else if (n == 2) { out = g_realloc(out, len + 1); out[len] = (char)(tmp2 >> 4) & 0xff; len++; } break; } tmp2 = ((tmp2 << 6) | (tmp & 0xff)); n++; if (n == 4) { out = g_realloc(out, len + 3); out[len] = (char)((tmp2 >> 16) & 0xff); len++; out[len] = (char)((tmp2 >> 8) & 0xff); len++; out[len] = (char)(tmp2 & 0xff); len++; tmp2 = 0; n = 0; } c++; } out = g_realloc(out, len + 1); out[len] = 0; *data = out; if (size) *size = len; } /* * Converts raw data to a pretty, null-terminated base16 string. */ unsigned char *tobase16(const unsigned char *data, int length) { int i; unsigned char *ascii = NULL; if (!data || !length) return NULL; ascii = g_malloc(length*2 + 1); for (i=0; i<length; i++) snprintf(&ascii[i*2], 3, "%02hhx", data[i]); return ascii; } /* * Converts a null-terminated string of hexidecimal to raw data. */ int frombase16(const char *ascii, unsigned char **raw) { int len, i, accumulator=0; unsigned char *data; if (!ascii || !(len = strlen(ascii)) || (len % 2)) return 0; data = g_malloc(len/2); for (i=0; i<len; i++) { if (!(i % 2)) accumulator = 0; else accumulator = accumulator << 4; if (isdigit(ascii[i])) accumulator |= ascii[i]-48; else switch(ascii[i]) { case 'a': case 'A': accumulator|=10; break; case 'b': case 'B': accumulator|=11; break; case 'c': case 'C': accumulator|=12; break; case 'd': case 'D': accumulator|=13; break; case 'e': case 'E': accumulator|=14; break; case 'f': case 'F': accumulator|=15; break; } if (i % 2) data[(i-1)/2] = accumulator; } *raw = data; return len/2; } char *normalize(const char *s) { static char buf[BUF_LEN]; char *tmp; int i, j; g_return_val_if_fail((s != NULL), NULL); strncpy(buf, s, BUF_LEN); for (i=0, j=0; buf[j]; i++, j++) { while (buf[j] == ' ') j++; buf[i] = buf[j]; } buf[i] = '\0'; tmp = g_utf8_strdown(buf, -1); g_snprintf(buf, sizeof(buf), "%s", tmp); g_free(tmp); tmp = g_utf8_normalize(buf, -1, G_NORMALIZE_DEFAULT); g_snprintf(buf, sizeof(buf), "%s", tmp); g_free(tmp); return buf; } char *date() { static char date[80]; time_t tme; time(&tme); strftime(date, sizeof(date), "%H:%M:%S", localtime(&tme)); return date; } void clean_pid(void) { #ifndef _WIN32 int status; pid_t pid; do { pid = waitpid(-1, &status, WNOHANG); } while (pid != 0 && pid != (pid_t)-1); if(pid == (pid_t)-1 && errno != ECHILD) { char errmsg[BUFSIZ]; snprintf(errmsg, BUFSIZ, "Warning: waitpid() returned %d", pid); perror(errmsg); } #endif } /* Look for %n, %d, or %t in msg, and replace with the sender's name, date, or time */ char *away_subs(const char *msg, const char *name) { char *c; static char cpy[BUF_LONG]; int cnt = 0; time_t t = time(0); struct tm *tme = localtime(&t); char tmp[20]; cpy[0] = '\0'; c = (char *)msg; while (*c) { switch (*c) { case '%': if (*(c + 1)) { switch (*(c + 1)) { case 'n': /* append name */ strcpy(cpy + cnt, name); cnt += strlen(name); c++; break; case 'd': /* append date */ strftime(tmp, 20, "%m/%d/%Y", tme); strcpy(cpy + cnt, tmp); cnt += strlen(tmp); c++; break; case 't': /* append time */ strftime(tmp, 20, "%r", tme); strcpy(cpy + cnt, tmp); cnt += strlen(tmp); c++; break; default: cpy[cnt++] = *c; } } break; default: cpy[cnt++] = *c; } c++; } cpy[cnt] = '\0'; return (cpy); } GSList *message_split(char *message, int limit) { static GSList *ret = NULL; int lastgood = 0, curgood = 0, curpos = 0, len = strlen(message); gboolean intag = FALSE; if (ret) { GSList *tmp = ret; while (tmp) { g_free(tmp->data); tmp = g_slist_remove(tmp, tmp->data); } ret = NULL; } while (TRUE) { if (lastgood >= len) return ret; if (len - lastgood < limit) { ret = g_slist_append(ret, g_strdup(&message[lastgood])); return ret; } curgood = curpos = 0; intag = FALSE; while (curpos <= limit) { if (isspace(message[curpos + lastgood]) && !intag) curgood = curpos; if (message[curpos + lastgood] == '<') intag = TRUE; if (message[curpos + lastgood] == '>') intag = FALSE; curpos++; } if (curgood) { ret = g_slist_append(ret, g_strndup(&message[lastgood], curgood)); if (isspace(message[curgood + lastgood])) lastgood += curgood + 1; else lastgood += curgood; } else { /* whoops, guess we have to fudge it here */ ret = g_slist_append(ret, g_strndup(&message[lastgood], limit)); lastgood += limit; } } } const gchar *gaim_home_dir() { #ifndef _WIN32 if(g_get_home_dir()) return g_get_home_dir(); else return NULL; #else return wgaim_data_dir(); #endif } /* returns a string of the form ~/.gaim, where ~ is replaced by the user's home * dir. Note that there is no trailing slash after .gaim. */ gchar *gaim_user_dir() { const gchar *hd = gaim_home_dir(); if(hd) { strcpy( (char*)&home_dir, hd ); strcat( (char*)&home_dir, G_DIR_SEPARATOR_S ".gaim" ); return (gchar*)&home_dir; } else { return NULL; } } /* * rcg10312000 This could be more robust, but it works for my current * goal: to remove those annoying <BR> tags. :) * dtf12162000 made the loop more readable. i am a neat freak. ;) */ void strncpy_nohtml(gchar *dest, const gchar *src, size_t destsize) { gchar *ptr; g_snprintf(dest, destsize, "%s", src); while ((ptr = strstr(dest, "<BR>")) != NULL) { /* replace <BR> with a newline. */ *ptr = '\n'; memmove(ptr + 1, ptr + 4, strlen(ptr + 4) + 1); } } void strncpy_withhtml(gchar *dest, const gchar *src, size_t destsize) { gchar *end = dest + destsize; while (dest < end) { if (*src == '\n' && dest < end - 5) { strcpy(dest, "<BR>"); src++; dest += 4; } else if(*src == '\r') { src++; } else { *dest++ = *src; if (*src == '\0') return; else src++; } } } /* * Like strncpy_withhtml (above), but malloc()'s the necessary space * * The caller is responsible for freeing the space pointed to by the * return value. */ gchar *strdup_withhtml(const gchar *src) { gchar *sp, *dest; gulong destsize; if(!src) return NULL; /* * All we need do is multiply the number of newlines by 3 (the * additional length of "<BR>" over "\n"), account for the * terminator, malloc the space and call strncpy_withhtml. */ for(destsize = 0, sp = (gchar *)src; (sp = strchr(sp, '\n')) != NULL; ++sp, ++destsize) ; destsize *= 3; destsize += strlen(src) + 1; dest = g_malloc(destsize); strncpy_withhtml(dest, src, destsize); return(dest); } void strip_linefeed(gchar *text) { int i, j; gchar *text2 = g_malloc(strlen(text) + 1); for (i = 0, j = 0; text[i]; i++) if (text[i] != '\r') text2[j++] = text[i]; text2[j] = '\0'; strcpy(text, text2); g_free(text2); } char *add_cr(const char *text) { char *ret = NULL; int count = 0, i, j; if (text[0] == '\n') count++; for (i = 1; i < strlen(text); i++) if (text[i] == '\n' && text[i - 1] != '\r') count++; if (count == 0) return g_strdup(text); ret = g_malloc0(strlen(text) + count + 1); i = 0; j = 0; if (text[i] == '\n') ret[j++] = '\r'; ret[j++] = text[i++]; for (; i < strlen(text); i++) { if (text[i] == '\n' && text[i - 1] != '\r') ret[j++] = '\r'; ret[j++] = text[i]; } gaim_debug(GAIM_DEBUG_INFO, "add_cr", "got: %s, leaving with %s\n", text, ret); return ret; } time_t get_time(int year, int month, int day, int hour, int min, int sec) { struct tm tm; tm.tm_year = year - 1900; tm.tm_mon = month - 1; tm.tm_mday = day; tm.tm_hour = hour; tm.tm_min = min; tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60; return mktime(&tm); } /* * Like mkstemp() but returns a file pointer, uses a pre-set template, * uses the semantics of tempnam() for the directory to use and allocates * the space for the filepath. * * Caller is responsible for closing the file and removing it when done, * as well as freeing the space pointed-to by "path" with g_free(). * * Returns NULL on failure and cleans up after itself if so. */ static const char *gaim_mkstemp_templ = {"gaimXXXXXX"}; FILE *gaim_mkstemp(gchar **fpath) { const gchar *tmpdir; #ifndef _WIN32 int fd; #endif FILE *fp = NULL; if((tmpdir = (gchar*)g_get_tmp_dir()) != NULL) { if((*fpath = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", tmpdir, gaim_mkstemp_templ)) != NULL) { #ifdef _WIN32 char* result = _mktemp( *fpath ); if( result == NULL ) gaim_debug(GAIM_DEBUG_ERROR, "gaim_mkstemp", "Problem creating the template\n"); else { if( (fp = fopen( result, "w+" )) == NULL ) { gaim_debug(GAIM_DEBUG_ERROR, "gaim_mkstemp", "Couldn't fopen() %s\n", result); } } #else if((fd = mkstemp(*fpath)) == -1) { gaim_debug(GAIM_DEBUG_ERROR, "gaim_mkstemp", "Couldn't make \"%s\", error: %d\n", *fpath, errno); } else { if((fp = fdopen(fd, "r+")) == NULL) { close(fd); gaim_debug(GAIM_DEBUG_ERROR, "gaim_mkstemp", "Couldn't fdopen(), error: %d\n", errno); } } #endif if(!fp) { g_free(*fpath); *fpath = NULL; } } } else { gaim_debug(GAIM_DEBUG_ERROR, "gaim_mkstemp", "g_get_tmp_dir() failed!"); } return fp; } gboolean program_is_valid(const char *program) { GError *error = NULL; char **argv; gchar *progname; gboolean is_valid = FALSE; if (program == NULL || *program == '\0') { return FALSE; } if (!g_shell_parse_argv(program, NULL, &argv, &error)) { gaim_debug(GAIM_DEBUG_ERROR, "program_is_valid", "Could not parse program '%s': %s\n", program, error->message); g_error_free(error); return FALSE; } if (argv == NULL) { return FALSE; } progname = g_find_program_in_path(argv[0]); is_valid = (progname != NULL); g_strfreev(argv); g_free(progname); return is_valid; } char *gaim_try_conv_to_utf8(const char *str) { gsize converted; char *utf8; if (str == NULL) { return NULL; } if (g_utf8_validate(str, -1, NULL)) { return g_strdup(str); } utf8 = g_locale_to_utf8(str, -1, &converted, NULL, NULL); if (utf8) return(utf8); g_free(utf8); utf8 = g_convert(str, -1, "UTF-8", "ISO-8859-15", &converted, NULL, NULL); if (utf8 && converted == strlen (str)) { return(utf8); } else if (utf8) { g_free(utf8); } return(NULL); } char *gaim_getip_from_fd(int fd) { struct sockaddr addr; socklen_t namelen = sizeof(addr); if (getsockname(fd, &addr, &namelen)) return NULL; return g_strdup(inet_ntoa(((struct sockaddr_in *)&addr)->sin_addr)); } gint gaim_utf8_strcasecmp(const gchar *a, const gchar *b) { gchar *a_norm=NULL; gchar *b_norm=NULL; gint ret=-1; if(!a && b) return -1; else if(!b && a) return 1; else if(!a && !b) return 0; if(!g_utf8_validate(a, -1, NULL) || !g_utf8_validate(b, -1, NULL)) { gaim_debug(GAIM_DEBUG_ERROR, "gaim_utf8_strcasecmp", "One or both parameters are invalid UTF8\n"); return ret; } a_norm = g_utf8_casefold(a, -1); b_norm = g_utf8_casefold(b, -1); ret = g_utf8_collate(a_norm, b_norm); g_free(a_norm); g_free(b_norm); return ret; } gchar *gaim_strreplace(const gchar *string, const gchar *delimiter, const gchar *replacement) { gchar **split; gchar *ret; split = g_strsplit(string, delimiter, 0); ret = g_strjoinv(replacement, split); g_strfreev(split); return ret; } const char *gaim_strcasestr(const char *haystack, const char *needle) { size_t hlen, nlen; const char *tmp, *ret; g_return_val_if_fail(haystack != NULL, NULL); g_return_val_if_fail(needle != NULL, NULL); hlen = strlen(haystack); nlen = strlen(needle); tmp = haystack, ret = NULL; g_return_val_if_fail(hlen > 0, NULL); g_return_val_if_fail(nlen > 0, NULL); while (*tmp && !ret) { if (!g_ascii_strncasecmp(needle, tmp, nlen)) ret = tmp; else tmp++; } return ret; } char *gaim_get_size_string(size_t size) { static const char *size_str[4] = { "bytes", "KB", "MB", "GB" }; float size_mag; int size_index = 0; if (size == -1) { return g_strdup(_("Calculating...")); } else if (size == 0) { return g_strdup(_("Unknown.")); } else { size_mag = (float)size; while ((size_index < 4) && (size_mag > 1024)) { size_mag /= 1024; size_index++; } return g_strdup_printf("%.2f %s", size_mag, size_str[size_index]); } } gboolean gaim_markup_find_tag(const char *needle, const char *haystack, const char **start, const char **end, GData **attributes) { GData *attribs; const char *cur = haystack; char *name = NULL; gboolean found = FALSE; gboolean in_tag = FALSE; gboolean in_attr = FALSE; const char *in_quotes = NULL; size_t needlelen = strlen(needle); g_datalist_init(&attribs); while (*cur && !found) { if (in_tag) { if (in_quotes) { const char *close = cur; while (*close && *close != *in_quotes) close++; /* if we got the close quote, store the value and carry on from * * after it. if we ran to the end of the string, point to the NULL * * and we're outta here */ if (*close) { /* only store a value if we have an attribute name */ if (name) { size_t len = close - cur; char *val = g_strndup(cur, len); g_datalist_set_data_full(&attribs, name, val, g_free); g_free(name); name = NULL; } in_quotes = NULL; cur = close + 1; } else { cur = close; } } else if (in_attr) { const char *close = cur; while (*close && *close != '>' && *close != '"' && *close != '\'' && *close != ' ' && *close != '=') close++; /* if we got the equals, store the name of the attribute. if we got * the quote, save the attribute and go straight to quote mode. * otherwise the tag closed or we reached the end of the string, * so we can get outta here */ switch (*close) { case '"': case '\'': in_quotes = close; case '=': { size_t len = close - cur; /* don't store a blank attribute name */ if (len) { if (name) g_free(name); name = g_ascii_strdown(cur, len); } in_attr = FALSE; cur = close + 1; break; } case ' ': case '>': in_attr = FALSE; default: cur = close; break; } } else { switch (*cur) { case ' ': /* swallow extra spaces inside tag */ while (*cur && *cur == ' ') cur++; in_attr = TRUE; break; case '>': found = TRUE; *end = cur; break; case '"': case '\'': in_quotes = cur; default: cur++; break; } } } else { /* if we hit a < followed by the name of our tag... */ if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) { *start = cur; cur = cur + needlelen + 1; /* if we're pointing at a space or a >, we found the right tag. if * * we're not, we've found a longer tag, so we need to skip to the * * >, but not being distracted by >s inside quotes. */ if (*cur == ' ' || *cur == '>') { in_tag = TRUE; } else { while (*cur && *cur != '"' && *cur != '>') { if (*cur == '"') { cur++; while (*cur && *cur != '"') cur++; } else { cur++; } } } } else { cur++; } } } /* clean up any attribute name from a premature termination */ if (name) g_free(name); if (found) { *attributes = attribs; } else { *start = NULL; *end = NULL; *attributes = NULL; } return found; } gboolean gaim_markup_extract_info_field(const char *str, char *dest_buffer, const char *start_token, int skip, const char *end_token, char check_value, const char *no_value_token, const char *display_name, gboolean is_link, const char *link_prefix) { const char *p, *q; char buf[1024]; g_return_val_if_fail(str != NULL, FALSE); g_return_val_if_fail(dest_buffer != NULL, FALSE); g_return_val_if_fail(start_token != NULL, FALSE); g_return_val_if_fail(end_token != NULL, FALSE); g_return_val_if_fail(display_name != NULL, FALSE); p = strstr(str, start_token); if (p == NULL) return FALSE; p += strlen(start_token) + skip; if (check_value != '\0' && *p == check_value) return FALSE; q = strstr(p, end_token); if (q != NULL && (!no_value_token || (no_value_token && strncmp(p, no_value_token, strlen(no_value_token))))) { strcat(dest_buffer, "<b>"); strcat(dest_buffer, display_name); strcat(dest_buffer, ":</b> "); if (is_link) { strcat(dest_buffer, "<br><a href=\""); memcpy(buf, p, q - p); buf[q - p] = '\0'; if (link_prefix) strcat(dest_buffer, link_prefix); strcat(dest_buffer, buf); strcat(dest_buffer, "\">"); if (link_prefix) strcat(dest_buffer, link_prefix); strcat(dest_buffer, buf); strcat(dest_buffer, "</a>"); } else { memcpy(buf, p, q - p); buf[q - p] = '\0'; strcat(dest_buffer, buf); } strcat(dest_buffer, "<br>\n"); return TRUE; } return FALSE; } struct gaim_parse_tag { char *src_tag; char *dest_tag; }; #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ const char *o = c + strlen("<" x); \ const char *p = NULL, *q = NULL, *r = NULL; \ GString *innards = g_string_new(""); \ while(o && *o) { \ if(!q && (*o == '\"' || *o == '\'') ) { \ q = o; \ } else if(q) { \ if(*o == *q) { \ char *unescaped = g_strndup(q+1, o-q-1); \ char *escaped = g_markup_escape_text(unescaped, -1); \ g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ g_free(unescaped); \ g_free(escaped); \ q = NULL; \ } else if(*c == '\\') { \ o++; \ } \ } else if(*o == '<') { \ r = o; \ } else if(*o == '>') { \ p = o; \ break; \ } else { \ innards = g_string_append_c(innards, *o); \ } \ o++; \ } \ if(p && !r) { \ if(*(p-1) != '/') { \ struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ pt->src_tag = x; \ pt->dest_tag = y; \ tags = g_list_prepend(tags, pt); \ } \ xhtml = g_string_append(xhtml, "<" y); \ c += strlen("<" x ); \ xhtml = g_string_append(xhtml, innards->str); \ xhtml = g_string_append_c(xhtml, '>'); \ c = p + 1; \ } else { \ xhtml = g_string_append(xhtml, "<"); \ plain = g_string_append_c(plain, '<'); \ c++; \ } \ g_string_free(innards, TRUE); \ continue; \ } \ if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ (*(c+strlen("<" x)) == '>' || \ !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ xhtml = g_string_append(xhtml, "<" y); \ c += strlen("<" x); \ if(*c != '/') { \ struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ pt->src_tag = x; \ pt->dest_tag = y; \ tags = g_list_prepend(tags, pt); \ xhtml = g_string_append_c(xhtml, '>'); \ } else { \ xhtml = g_string_append(xhtml, "/>");\ } \ c = strchr(c, '>') + 1; \ continue; \ } #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) void gaim_markup_html_to_xhtml(const char *html, char **xhtml_out, char **plain_out) { GString *xhtml = g_string_new(""); GString *plain = g_string_new(""); GList *tags = NULL, *tag; const char *c = html; while(c && *c) { if(*c == '<') { if(*(c+1) == '/') { /* closing tag */ tag = tags; while(tag) { struct gaim_parse_tag *pt = tag->data; if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { c += strlen(pt->src_tag) + 3; break; } tag = tag->next; } if(tag) { while(tags) { struct gaim_parse_tag *pt = tags->data; g_string_append_printf(xhtml, "</%s>", pt->dest_tag); if(tags == tag) break; tags = g_list_remove(tags, pt); g_free(pt); } g_free(tag->data); tags = g_list_remove(tags, tag->data); } else { /* we tried to close a tag we never opened! escape it * and move on */ xhtml = g_string_append(xhtml, "<"); plain = g_string_append_c(plain, '<'); c++; } } else { /* opening tag */ ALLOW_TAG("a"); ALLOW_TAG_ALT("b", "strong"); ALLOW_TAG("blockquote"); ALLOW_TAG_ALT("bold", "strong"); ALLOW_TAG("cite"); ALLOW_TAG("div"); ALLOW_TAG("em"); ALLOW_TAG("h1"); ALLOW_TAG("h2"); ALLOW_TAG("h3"); ALLOW_TAG("h4"); ALLOW_TAG("h5"); ALLOW_TAG("h6"); /* we only allow html to start the message */ if(c == html) ALLOW_TAG("html"); ALLOW_TAG_ALT("i", "em"); ALLOW_TAG_ALT("italic", "em"); ALLOW_TAG("li"); ALLOW_TAG("ol"); ALLOW_TAG("p"); ALLOW_TAG("pre"); ALLOW_TAG("q"); ALLOW_TAG("span"); ALLOW_TAG("strong"); ALLOW_TAG("ul"); /* we skip <HR> because it's not legal in XHTML-IM. However, * we still want to send something sensible, so we put a * linebreak in its place. <BR> also needs special handling * because putting a </BR> to close it would just be dumb. */ if((!g_ascii_strncasecmp(c, "<br", 3) || !g_ascii_strncasecmp(c, "<hr", 3)) && (*(c+3) == '>' || !g_ascii_strncasecmp(c+3, "/>", 2) || !g_ascii_strncasecmp(c+3, " />", 3))) { c = strchr(c, '>') + 1; xhtml = g_string_append(xhtml, "<br/>"); if(*c != '\n') plain = g_string_append_c(plain, '\n'); continue; } if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); pt->src_tag = *(c+2) == '>' ? "u" : "underline"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); continue; } if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); pt->src_tag = *(c+2) == '>' ? "s" : "strike"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); continue; } if(!g_ascii_strncasecmp(c, "<sub>", 5)) { struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); pt->src_tag = "sub"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); continue; } if(!g_ascii_strncasecmp(c, "<sup>", 5)) { struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); pt->src_tag = "sup"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); c = strchr(c, '>') + 1; xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); continue; } if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { const char *p = c; GString *style = g_string_new(""); struct gaim_parse_tag *pt; while(*p && *p != '>') { if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) { const char *q = p + strlen("color="); GString *color = g_string_new(""); if(*q == '\'' || *q == '\"') q++; while(*q && *q != '\"' && *q != '\'' && *q != ' ') { color = g_string_append_c(color, *q); q++; } g_string_append_printf(style, "color: %s; ", color->str); g_string_free(color, TRUE); p = q; } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) { const char *q = p + strlen("face="); gboolean space_allowed = FALSE; GString *face = g_string_new(""); if(*q == '\'' || *q == '\"') { space_allowed = TRUE; q++; } while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) { face = g_string_append_c(face, *q); q++; } g_string_append_printf(style, "font-family: %s; ", face->str); g_string_free(face, TRUE); p = q; } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) { const char *q = p + strlen("size="); int sz; const char *size = "medium"; if(*q == '\'' || *q == '\"') q++; sz = atoi(q); if(sz < 3) size = "smaller"; else if(sz > 3) size = "larger"; g_string_append_printf(style, "font-size: %s; ", size); p = q; } p++; } c = strchr(c, '>') + 1; pt = g_new0(struct gaim_parse_tag, 1); pt->src_tag = "font"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); xhtml = g_string_append(xhtml, "<span"); if(style->len) g_string_append_printf(xhtml, " style='%s'", style->str); xhtml = g_string_append_c(xhtml, '>'); g_string_free(style, TRUE); continue; } if(!g_ascii_strncasecmp(c, "<body ", 6)) { const char *p = c; gboolean did_something = FALSE; while(*p && *p != '>') { if(!g_ascii_strncasecmp(p, "bgcolor=", strlen("bgcolor="))) { const char *q = p + strlen("bgcolor="); struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); GString *color = g_string_new(""); if(*q == '\'' || *q == '\"') q++; while(*q && *q != '\"' && *q != '\'' && *q != ' ') { color = g_string_append_c(color, *q); q++; } g_string_append_printf(xhtml, "<span style='background: %s;'>", color->str); g_string_free(color, TRUE); c = strchr(c, '>') + 1; pt->src_tag = "body"; pt->dest_tag = "span"; tags = g_list_prepend(tags, pt); did_something = TRUE; break; } p++; } if(did_something) continue; } /* this has to come after the special case for bgcolor */ ALLOW_TAG("body"); if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { char *p = strstr(c + strlen("<!--"), "-->"); if(p) { xhtml = g_string_append(xhtml, "<!--"); c += strlen("<!--"); continue; } } xhtml = g_string_append(xhtml, "<"); plain = g_string_append_c(plain, '<'); c++; } } else { xhtml = g_string_append_c(xhtml, *c); plain = g_string_append_c(plain, *c); c++; } } tag = tags; while(tag) { g_string_append_printf(xhtml, "</%s>", (char *)tag->data); tag = tag->next; } g_list_free(tags); if(xhtml_out) *xhtml_out = g_strdup(xhtml->str); if(plain_out) *plain_out = g_strdup(plain->str); g_string_free(xhtml, TRUE); g_string_free(plain, TRUE); } char * gaim_markup_strip_html(const char *str) { int i, j, k; gboolean visible = TRUE; gchar *str2; g_return_val_if_fail(str != NULL, NULL); str2 = g_strdup(str); for (i = 0, j = 0; str2[i]; i++) { if (str2[i] == '<') { k = i + 1; if(g_ascii_isspace(str2[k])) visible = TRUE; else { while (str2[k]) { if (str2[k] == '<') { visible = TRUE; break; } if (str2[k] == '>') { visible = FALSE; break; } k++; } } } else if (str2[i] == '>' && !visible) { visible = TRUE; continue; } if (str2[i] == '&' && strncasecmp(str2 + i, """, 6) == 0) { str2[j++] = '\"'; i = i + 5; continue; } if (visible) str2[j++] = str2[i]; } str2[j] = '\0'; return str2; } gboolean gaim_url_parse(const char *url, char **ret_host, int *ret_port, char **ret_path) { char scan_info[255]; char port_str[5]; int f; const char *turl; char host[256], path[256]; int port = 0; /* hyphen at end includes it in control set */ static char addr_ctrl[] = "A-Za-z0-9.-"; static char port_ctrl[] = "0-9"; static char page_ctrl[] = "A-Za-z0-9.~_/:*!@&%%?=+^-"; g_return_val_if_fail(url != NULL, FALSE); if ((turl = strstr(url, "http://")) != NULL || (turl = strstr(url, "HTTP://")) != NULL) { turl += 7; url = turl; } g_snprintf(scan_info, sizeof(scan_info), "%%[%s]:%%[%s]/%%[%s]", addr_ctrl, port_ctrl, page_ctrl); f = sscanf(url, scan_info, host, port_str, path); if (f == 1) { g_snprintf(scan_info, sizeof(scan_info), "%%[%s]/%%[%s]", addr_ctrl, page_ctrl); f = sscanf(url, scan_info, host, path); g_snprintf(port_str, sizeof(port_str), "80"); } if (f == 1) *path = '\0'; sscanf(port_str, "%d", &port); if (ret_host != NULL) *ret_host = g_strdup(host); if (ret_port != NULL) *ret_port = port; if (ret_path != NULL) *ret_path = g_strdup(path); return TRUE; } static void destroy_fetch_url_data(GaimFetchUrlData *gfud) { if (gfud->webdata != NULL) g_free(gfud->webdata); if (gfud->url != NULL) g_free(gfud->url); if (gfud->user_agent != NULL) g_free(gfud->user_agent); if (gfud->website.address != NULL) g_free(gfud->website.address); if (gfud->website.page != NULL) g_free(gfud->website.page); g_free(gfud); } static gboolean parse_redirect(const char *data, size_t data_len, gint sock, GaimFetchUrlData *gfud) { gchar *s; if ((s = g_strstr_len(data, data_len, "Location: ")) != NULL) { gchar *new_url, *temp_url, *end; gboolean full; int len; s += strlen("Location: "); end = strchr(s, '\r'); /* Just in case :) */ if (end == NULL) end = strchr(s, '\n'); len = end - s; new_url = g_malloc(len + 1); strncpy(new_url, s, len); new_url[len] = '\0'; full = gfud->full; if (*new_url == '/' || g_strstr_len(new_url, len, "://") == NULL) { temp_url = new_url; new_url = g_strdup_printf("%s:%d%s", gfud->website.address, gfud->website.port, temp_url); g_free(temp_url); full = FALSE; } /* Close the existing stuff. */ gaim_input_remove(gfud->inpa); close(sock); gaim_debug_info("gaim_url_fetch", "Redirecting to %s\n", new_url); /* Try again, with this new location. */ gaim_url_fetch(new_url, full, gfud->user_agent, gfud->http11, gfud->callback, gfud->user_data); /* Free up. */ g_free(new_url); destroy_fetch_url_data(gfud); return TRUE; } return FALSE; } static size_t parse_content_len(const char *data, size_t data_len) { size_t content_len = 0; sscanf(data, "Content-Length: %d", &content_len); return content_len; } static void url_fetched_cb(gpointer url_data, gint sock, GaimInputCondition cond) { GaimFetchUrlData *gfud = url_data; char data; if (sock == -1) { gfud->callback(gfud->user_data, NULL, 0); destroy_fetch_url_data(gfud); return; } if (!gfud->sentreq) { char buf[1024]; if (gfud->user_agent) { if (gfud->http11) { g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.1\r\n" "User-Agent: \"%s\"\r\n" "Host: %s\r\n\r\n", (gfud->full ? "" : "/"), (gfud->full ? gfud->url : gfud->website.page), gfud->user_agent, gfud->website.address); } else { g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n" "User-Agent: \"%s\"\r\n\r\n", (gfud->full ? "" : "/"), (gfud->full ? gfud->url : gfud->website.page), gfud->user_agent); } } else { if (gfud->http11) { g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.1\r\n" "Host: %s\r\n\r\n", (gfud->full ? "" : "/"), (gfud->full ? gfud->url : gfud->website.page), gfud->website.address); } else { g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n\r\n", (gfud->full ? "" : "/"), (gfud->full ? gfud->url : gfud->website.page)); } } gaim_debug_misc("gaim_url_fetch", "Request: %s\n", buf); write(sock, buf, strlen(buf)); fcntl(sock, F_SETFL, O_NONBLOCK); gfud->sentreq = TRUE; gfud->inpa = gaim_input_add(sock, GAIM_INPUT_READ, url_fetched_cb, url_data); gfud->data_len = 4096; gfud->webdata = g_malloc(gfud->data_len); return; } if (read(sock, &data, 1) > 0 || errno == EWOULDBLOCK) { if (errno == EWOULDBLOCK) { errno = 0; return; } gfud->len++; if (gfud->len == gfud->data_len + 1) { gfud->data_len += (gfud->data_len) / 2; gfud->webdata = g_realloc(gfud->webdata, gfud->data_len); } gfud->webdata[gfud->len - 1] = data; if (!gfud->startsaving) { if (data == '\r') return; if (data == '\n') { if (gfud->newline) { size_t content_len; gfud->startsaving = TRUE; /* See if we can find a redirect. */ if (parse_redirect(gfud->webdata, gfud->len, sock, gfud)) return; /* No redirect. See if we can find a content length. */ content_len = parse_content_len(gfud->webdata, gfud->len); if (content_len == 0) { /* We'll stick with an initial 8192 */ content_len = 8192; } /* Out with the old... */ gfud->len = 0; g_free(gfud->webdata); gfud->webdata = NULL; /* In with the new. */ gfud->data_len = content_len; gfud->webdata = g_malloc(gfud->data_len); } else gfud->newline = TRUE; return; } gfud->newline = FALSE; } } else if (errno != ETIMEDOUT) { gfud->webdata = g_realloc(gfud->webdata, gfud->len + 1); gfud->webdata[gfud->len] = 0; gaim_debug_misc("gaim_url_fetch", "Received: '%s'\n", gfud->webdata); gaim_input_remove(gfud->inpa); close(sock); gfud->callback(gfud->user_data, gfud->webdata, gfud->len); if (gfud->webdata) g_free(gfud->webdata); destroy_fetch_url_data(gfud); } else { gaim_input_remove(gfud->inpa); close(sock); gfud->callback(gfud->user_data, NULL, 0); destroy_fetch_url_data(gfud); } } void gaim_url_fetch(const char *url, gboolean full, const char *user_agent, gboolean http11, void (*cb)(gpointer, const char *, size_t), void *user_data) { int sock; GaimFetchUrlData *gfud; g_return_if_fail(url != NULL); g_return_if_fail(cb != NULL); gfud = g_new0(GaimFetchUrlData, 1); gfud->callback = cb; gfud->user_data = user_data; gfud->url = g_strdup(url); gfud->user_agent = (user_agent != NULL ? g_strdup(user_agent) : NULL); gfud->http11 = http11; gfud->full = full; gaim_url_parse(url, &gfud->website.address, &gfud->website.port, &gfud->website.page); if ((sock = gaim_proxy_connect(NULL, gfud->website.address, gfud->website.port, url_fetched_cb, gfud)) < 0) { destroy_fetch_url_data(gfud); cb(user_data, g_strdup(_("g003: Error opening connection.\n")), 0); } }