view src/html.c @ 780:c714def9cebb

[gaim-migrate @ 790] You may be a geek if... You've ever used a computer on Friday, Saturday and Sunday of the same weekend. You find yourself interrupting computer store salesman to correct something he said. The first thing you notice when walking in a business is their computer system. ...and offer advice on how you would change it. You've ever mounted a magnetic tape reel. You own any shareware. You know more IP addresses than phone numbers. You've ever accidentally dialed an IP address. Your friends use you as tech support. You've ever named a computer. You have your local computer store on speed dial. You can't carry on a conversation without talking about computers. Co-workers have to E-mail you about the fire alarm to get you out of the building. You've ever found "stray" diskettes when doing laundry. Your computer has it's own phone line - but your teenager doesn't. You check the national weather service web page for current weather conditions (rather than look out the window). You know more URLs than street addresses. Your pet has a web page. You get really excited when Yahoo adds your link. committer: Tailor Script <tailor@pidgin.im>
author Eric Warmenhoven <eric@warmenhoven.org>
date Tue, 29 Aug 2000 03:59:01 +0000
parents 104a2659b358
children 56c7ceb986a8
line wrap: on
line source

/*
 * gaim
 *
 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#ifdef HAVE_CONFIG_H
#include "../config.h"
#endif
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>
#include <gtk/gtk.h>
#include <gdk/gdkprivate.h>
#include <gdk/gdkx.h>
#include "gaim.h"
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <netinet/in.h>
#include <fcntl.h>
#include <errno.h>

gchar * strip_html(gchar * text)
{
	int i, j;
	int visible = 1;
	gchar *text2 = g_malloc(strlen(text) + 1);
	
	strcpy(text2, text);
	for (i = 0, j = 0;text2[i]; i++)
	{
		if(text2[i]=='<')
		{	
			visible = 0;
			continue;
		}
		else if(text2[i]=='>')
		{
			visible = 1;
			continue;
		}
		if(visible)
		{
			text2[j++] = text2[i];
		}
	}
	text2[j] = '\0';
	return text2;
}

struct g_url parse_url(char *url)
{
	struct g_url test;
	char scan_info[255];
	char port[5];
	int f;

	if (strstr(url, "http://"))
		g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?]");
	else
		g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?]");
	f = sscanf(url, scan_info, test.address, port, test.page);
	if (f == 1) {
		if (strstr(url, "http://"))
			g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?]");
		else
			g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?]");
        f = sscanf(url, scan_info, test.address, test.page);
        g_snprintf(port, sizeof(test.port), "80");
        port[2] = 0;
	}
	if (f == 1) {
		if (strstr(url, "http://"))
			g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]");
        else
		g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]");
		f = sscanf(url, scan_info, test.address);
		g_snprintf(test.page, sizeof(test.page), "%c", '\0');
	}

	sscanf(port, "%d", &test.port);
	return test;
}

char *grab_url(char *url)
{
	struct g_url website;
	char *webdata = NULL;
        int sock;
        int len;
	int read_rv;
	int datalen = 0;
	struct in_addr *host;
	char buf[256];
	char data;
        int startsaving = 0;
        GtkWidget *pw = NULL, *pbar = NULL, *label;

        website = parse_url(url);

	host = (struct in_addr *)get_address(website.address);
	if (!host) { return g_strdup(_("g001: Error resolving host\n")); }
	if ((sock = connect_address(host->s_addr, website.port)) < 0)
		return g_strdup(_("g003: Error opening connection.\n"));

	g_snprintf(buf, sizeof(buf), "GET /%s HTTP/1.0\r\n\r\n", website.page);
	g_snprintf(debug_buff, sizeof(debug_buff), "Request: %s\n", buf);
	debug_print(debug_buff);
	write(sock, buf, strlen(buf));
	fcntl(sock, F_SETFL, O_NONBLOCK);

        webdata = NULL;
        len = 0;
	
	/*
	 * avoid fgetc(), it causes problems on solaris
	while ((data = fgetc(sockfile)) != EOF) {
	*/
	/* read_rv will be 0 on EOF and < 0 on error, so this should be fine */
	while ((read_rv = read(sock, &data, 1)) > 0 || errno == EWOULDBLOCK) {
		if (errno == EWOULDBLOCK) {
			errno = 0;
			continue;
		}

		if (!data)
			continue;
		
		if (!startsaving && data == '<') {
#ifdef HAVE_STRSTR
			char *cs = strstr(webdata, "Content-Length");
			if (cs) {
				char tmpbuf[1024];
				sscanf(cs, "Content-Length: %d", &datalen);

                                g_snprintf(tmpbuf, 1024, _("Getting %d bytes from %s"), datalen, url);
                                pw = gtk_dialog_new();

				label = gtk_label_new(tmpbuf);
				gtk_widget_show(label);
				gtk_box_pack_start(GTK_BOX(GTK_DIALOG(pw)->vbox),
						   label, FALSE, FALSE, 5);
				
				pbar = gtk_progress_bar_new();
				gtk_box_pack_start(GTK_BOX(GTK_DIALOG(pw)->action_area),
						   pbar, FALSE, FALSE, 5);
                                gtk_widget_show(pbar);
                                
                                gtk_window_set_title(GTK_WINDOW(pw), _("Getting Data"));
                                
                                gtk_widget_realize(pw);
                                aol_icon(pw->window);

                                gtk_widget_show(pw);
                        } else
				datalen = 0;
#else
			datalen = 0;
#endif
			g_free(webdata);
			webdata = NULL;
			len = 0;
			startsaving = 1;
		}

		len++;
		webdata = g_realloc(webdata, len);
		webdata[len - 1] = data;

		if (pbar)
			gtk_progress_bar_update(GTK_PROGRESS_BAR(pbar),
                                                ((100 * len) / datalen) / 100.0);
		
		while (gtk_events_pending())
			gtk_main_iteration();
	}

        webdata = g_realloc(webdata, len+1);
        webdata[len] = 0;


        g_snprintf(debug_buff, sizeof(debug_buff), _("Receieved: '%s'\n"), webdata);
        debug_print(debug_buff);

        if (pw)
                gtk_widget_destroy(pw);

	close(sock);
	return webdata;
}

char *fix_url(gchar *buf)
{
	char *new,*tmp;
	int size;

	size=8;
	size+=strlen(quad_addr);
	tmp=strchr(strchr(buf,':')+1,':');
	size+=strlen(tmp);
	new=g_malloc(size);
	strcpy(new,"http://");
	strcat(new,quad_addr);
	strcat(new,tmp);
	return(new);
}