changeset 9240:f1d87ab17e41

[gaim-migrate @ 10039] wing writes: While working on patch 967849, found that there are a few problems with current Content-Length handling: 1. parse_content_len uses a single sscanf on data. However, data (the response headers) would begin with the http status, not the Content-Length 2. data is not nul-terminated, but sscanf assumes a nul-terminated string. This could (though very unlikely) be unpredictable if input is malformed. 3. if gaim_url_fetch succeeds in extracting the Content-Length from the response headers, it will silently discard all the data after they have been read, because it does not keep track of whether the specified number of bytes has been read (The "else" case that corresponds to this situation is an empty block.) The attached patch corrects the above problems, though the patch is still technically not correct, since Content-Length theoretically should be matched case-insensitively. He then revised the patch and wrote: More problems, unrelated to content-length handling, have been discovered: 1. Gaim's http/1.1 support is broken; http/1.1 clients are _required_ to be able to decode the "chunked" transfer encoding, but gaim cannot do it. Since the difference between gaim's http 1.0 and 1.1 support is only to send the Host header, which is not forbidden in http 1.0, I have changed gaim's http 1.0 behaviour to also send the Host header. This is required for Yahoo Japan "avatar" support. (The server requires a Host header but sends the image in the chunked encoding, which gaim cannot decode.) 2. User-Agent should not be quoted committer: Tailor Script <tailor@pidgin.im>
author Tim Ringenbach <marv@pidgin.im>
date Wed, 09 Jun 2004 01:24:47 +0000
parents 3434d420fe69
children 5e9018c81bd9
files src/util.c
diffstat 1 files changed, 72 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/src/util.c	Tue Jun 08 23:32:08 2004 +0000
+++ b/src/util.c	Wed Jun 09 01:24:47 2004 +0000
@@ -57,6 +57,7 @@
 	gboolean sentreq;
 	gboolean newline;
 	gboolean startsaving;
+	gboolean has_explicit_data_len;
 	char *webdata;
 	unsigned long len;
 	unsigned long data_len;
@@ -2532,8 +2533,29 @@
 parse_content_len(const char *data, size_t data_len)
 {
 	size_t content_len = 0;
-
-	sscanf(data, "Content-Length: %d", (int *)&content_len);
+	const char *p = NULL;
+
+	/* This is still technically wrong, since headers are case-insensitive
+	 * [RFC 2616, section 4.2], though this ought to catch the normal case.
+	 * Note: data is _not_ nul-terminated.
+	 */
+	if (data_len > 16) {
+		p = strncmp(data, "Content-Length: ", 16) == 0? data: NULL;
+		if (!p) {
+			p = g_strstr_len(data, data_len, "\nContent-Length: ");
+			if (p)
+				p += 1;
+		}
+	}
+
+	/* If we can find a Content-Length header at all, try to sscanf it.
+	 * Response headers should end with at least \r\n, so sscanf is safe,
+	 * if we make sure that there is indeed a \n in our header.
+	 */
+	if (p && g_strstr_len(p, data_len - (p - data), "\n")) {
+		sscanf(p, "Content-Length: %d", (int *)&content_len);
+		gaim_debug_misc("parse_content_len", "parsed %d\n", content_len);
+	}
 
 	return content_len;
 }
@@ -2543,6 +2565,7 @@
 {
 	GaimFetchUrlData *gfud = url_data;
 	char data;
+	gboolean got_eof = FALSE;
 
 	if (sock == -1)
 	{
@@ -2559,44 +2582,29 @@
 
 		if (gfud->user_agent)
 		{
-			if (gfud->http11)
-			{
-				g_snprintf(buf, sizeof(buf),
-						   "GET %s%s HTTP/1.1\r\n"
-						   "User-Agent: \"%s\"\r\n"
-						   "Host: %s\r\n\r\n",
-						   (gfud->full ? "" : "/"),
-						   (gfud->full ? gfud->url : gfud->website.page),
-						   gfud->user_agent, gfud->website.address);
-			}
-			else
-			{
-				g_snprintf(buf, sizeof(buf),
-						   "GET %s%s HTTP/1.0\r\n"
-						   "User-Agent: \"%s\"\r\n\r\n",
-						   (gfud->full ? "" : "/"),
-						   (gfud->full ? gfud->url : gfud->website.page),
-						   gfud->user_agent);
-			}
+			/* Host header is not forbidden in HTTP/1.0 requests, and HTTP/1.1
+			 * clients must know how to handle the "chunked" transfer encoding.
+			 * Gaim doesn't know how to handle "chunked", so should always send
+			 * the Host header regardless, to get around some observed problems
+			 */
+			g_snprintf(buf, sizeof(buf),
+					   "GET %s%s HTTP/%s\r\n"
+					   "User-Agent: %s\r\n"
+					   "Host: %s\r\n\r\n",
+					   (gfud->full ? "" : "/"),
+					   (gfud->full ? gfud->url : gfud->website.page),
+					   (gfud->http11 ? "1.1" : "1.0"),
+					   gfud->user_agent, gfud->website.address);
 		}
 		else
 		{
-			if (gfud->http11)
-			{
-				g_snprintf(buf, sizeof(buf),
-						   "GET %s%s HTTP/1.1\r\n"
-						   "Host: %s\r\n\r\n",
-						   (gfud->full ? "" : "/"),
-						   (gfud->full ? gfud->url : gfud->website.page),
-						   gfud->website.address);
-			}
-			else
-			{
-				g_snprintf(buf, sizeof(buf),
-						   "GET %s%s HTTP/1.0\r\n\r\n",
-						   (gfud->full ? "" : "/"),
-						   (gfud->full ? gfud->url : gfud->website.page));
-			}
+			g_snprintf(buf, sizeof(buf),
+					   "GET %s%s HTTP/%s\r\n"
+					   "Host: %s\r\n\r\n",
+					   (gfud->full ? "" : "/"),
+					   (gfud->full ? gfud->url : gfud->website.page),
+					   (gfud->http11 ? "1.1" : "1.0"),
+					   gfud->website.address);
 		}
 
 		gaim_debug_misc("gaim_url_fetch", "Request: %s\n", buf);
@@ -2644,6 +2652,8 @@
 					size_t content_len;
 					gfud->startsaving = TRUE;
 
+					gaim_debug_misc("gaim_url_fetch", "Response headers: '%*.*s'\n", gfud->len, gfud->len, gfud->webdata);
+
 					/* See if we can find a redirect. */
 					if (parse_redirect(gfud->webdata, gfud->len, sock, gfud))
 						return;
@@ -2656,6 +2666,10 @@
 						/* We'll stick with an initial 8192 */
 						content_len = 8192;
 					}
+					else
+					{
+						gfud->has_explicit_data_len = TRUE;
+					}
 
 					/* Out with the old... */
 					gfud->len = 0;
@@ -2674,9 +2688,26 @@
 
 			gfud->newline = FALSE;
 		}
+		else if (gfud->has_explicit_data_len && gfud->len == gfud->data_len)
+		{
+			got_eof = TRUE;
+		}
 	}
 	else if (errno != ETIMEDOUT)
 	{
+		got_eof = TRUE;
+	}
+	else
+	{
+		gaim_input_remove(gfud->inpa);
+		close(sock);
+
+		gfud->callback(gfud->user_data, NULL, 0);
+
+		destroy_fetch_url_data(gfud);
+	}
+
+	if (got_eof) {
 		gfud->webdata = g_realloc(gfud->webdata, gfud->len + 1);
 		gfud->webdata[gfud->len] = 0;
 
@@ -2688,15 +2719,6 @@
 
 		destroy_fetch_url_data(gfud);
 	}
-	else
-	{
-		gaim_input_remove(gfud->inpa);
-		close(sock);
-
-		gfud->callback(gfud->user_data, NULL, 0);
-
-		destroy_fetch_url_data(gfud);
-	}
 }
 
 void
@@ -2708,6 +2730,10 @@
 	int sock;
 	GaimFetchUrlData *gfud;
 
+	gaim_debug_info("gaim_url_fetch",
+			 "requested to fetch (%s), full=%d, user_agent=(%s), http11=%d\n",
+			 url, full, user_agent, http11);
+
 	g_return_if_fail(url != NULL);
 	g_return_if_fail(cb  != NULL);