Mercurial > pidgin
comparison src/util.c @ 9240:f1d87ab17e41
[gaim-migrate @ 10039]
wing writes:
While working on patch 967849, found that there are a
few problems with current Content-Length handling:
1. parse_content_len uses a single sscanf on data.
However, data (the response headers) would begin with
the http status, not the Content-Length
2. data is not nul-terminated, but sscanf assumes a
nul-terminated string. This could (though very
unlikely) be unpredictable if input is malformed.
3. if gaim_url_fetch succeeds in extracting the
Content-Length from the response headers, it will
silently discard all the data after they have been
read, because it does not keep track of whether the
specified number of bytes has been read (The "else"
case that corresponds to this situation is an empty block.)
The attached patch corrects the above problems, though
the patch is still technically not correct, since
Content-Length theoretically should be matched
case-insensitively.
He then revised the patch and wrote:
More problems, unrelated to content-length handling, have
been discovered:
1. Gaim's http/1.1 support is broken; http/1.1 clients are
_required_ to be able to decode the "chunked" transfer
encoding, but gaim cannot do it. Since the difference
between gaim's http 1.0 and 1.1 support is only to send the
Host header, which is not forbidden in http 1.0, I have
changed gaim's http 1.0 behaviour to also send the Host
header. This is required for Yahoo Japan "avatar" support.
(The server requires a Host header but sends the image in
the chunked encoding, which gaim cannot decode.)
2. User-Agent should not be quoted
committer: Tailor Script <tailor@pidgin.im>
author | Tim Ringenbach <marv@pidgin.im> |
---|---|
date | Wed, 09 Jun 2004 01:24:47 +0000 |
parents | 0c352d0e4ddc |
children | 5e9018c81bd9 |
comparison
equal
deleted
inserted
replaced
9239:3434d420fe69 | 9240:f1d87ab17e41 |
---|---|
55 int inpa; | 55 int inpa; |
56 | 56 |
57 gboolean sentreq; | 57 gboolean sentreq; |
58 gboolean newline; | 58 gboolean newline; |
59 gboolean startsaving; | 59 gboolean startsaving; |
60 gboolean has_explicit_data_len; | |
60 char *webdata; | 61 char *webdata; |
61 unsigned long len; | 62 unsigned long len; |
62 unsigned long data_len; | 63 unsigned long data_len; |
63 | 64 |
64 } GaimFetchUrlData; | 65 } GaimFetchUrlData; |
2530 | 2531 |
2531 static size_t | 2532 static size_t |
2532 parse_content_len(const char *data, size_t data_len) | 2533 parse_content_len(const char *data, size_t data_len) |
2533 { | 2534 { |
2534 size_t content_len = 0; | 2535 size_t content_len = 0; |
2535 | 2536 const char *p = NULL; |
2536 sscanf(data, "Content-Length: %d", (int *)&content_len); | 2537 |
2538 /* This is still technically wrong, since headers are case-insensitive | |
2539 * [RFC 2616, section 4.2], though this ought to catch the normal case. | |
2540 * Note: data is _not_ nul-terminated. | |
2541 */ | |
2542 if (data_len > 16) { | |
2543 p = strncmp(data, "Content-Length: ", 16) == 0? data: NULL; | |
2544 if (!p) { | |
2545 p = g_strstr_len(data, data_len, "\nContent-Length: "); | |
2546 if (p) | |
2547 p += 1; | |
2548 } | |
2549 } | |
2550 | |
2551 /* If we can find a Content-Length header at all, try to sscanf it. | |
2552 * Response headers should end with at least \r\n, so sscanf is safe, | |
2553 * if we make sure that there is indeed a \n in our header. | |
2554 */ | |
2555 if (p && g_strstr_len(p, data_len - (p - data), "\n")) { | |
2556 sscanf(p, "Content-Length: %d", (int *)&content_len); | |
2557 gaim_debug_misc("parse_content_len", "parsed %d\n", content_len); | |
2558 } | |
2537 | 2559 |
2538 return content_len; | 2560 return content_len; |
2539 } | 2561 } |
2540 | 2562 |
2541 static void | 2563 static void |
2542 url_fetched_cb(gpointer url_data, gint sock, GaimInputCondition cond) | 2564 url_fetched_cb(gpointer url_data, gint sock, GaimInputCondition cond) |
2543 { | 2565 { |
2544 GaimFetchUrlData *gfud = url_data; | 2566 GaimFetchUrlData *gfud = url_data; |
2545 char data; | 2567 char data; |
2568 gboolean got_eof = FALSE; | |
2546 | 2569 |
2547 if (sock == -1) | 2570 if (sock == -1) |
2548 { | 2571 { |
2549 gfud->callback(gfud->user_data, NULL, 0); | 2572 gfud->callback(gfud->user_data, NULL, 0); |
2550 | 2573 |
2557 { | 2580 { |
2558 char buf[1024]; | 2581 char buf[1024]; |
2559 | 2582 |
2560 if (gfud->user_agent) | 2583 if (gfud->user_agent) |
2561 { | 2584 { |
2562 if (gfud->http11) | 2585 /* Host header is not forbidden in HTTP/1.0 requests, and HTTP/1.1 |
2563 { | 2586 * clients must know how to handle the "chunked" transfer encoding. |
2564 g_snprintf(buf, sizeof(buf), | 2587 * Gaim doesn't know how to handle "chunked", so should always send |
2565 "GET %s%s HTTP/1.1\r\n" | 2588 * the Host header regardless, to get around some observed problems |
2566 "User-Agent: \"%s\"\r\n" | 2589 */ |
2567 "Host: %s\r\n\r\n", | 2590 g_snprintf(buf, sizeof(buf), |
2568 (gfud->full ? "" : "/"), | 2591 "GET %s%s HTTP/%s\r\n" |
2569 (gfud->full ? gfud->url : gfud->website.page), | 2592 "User-Agent: %s\r\n" |
2570 gfud->user_agent, gfud->website.address); | 2593 "Host: %s\r\n\r\n", |
2571 } | 2594 (gfud->full ? "" : "/"), |
2572 else | 2595 (gfud->full ? gfud->url : gfud->website.page), |
2573 { | 2596 (gfud->http11 ? "1.1" : "1.0"), |
2574 g_snprintf(buf, sizeof(buf), | 2597 gfud->user_agent, gfud->website.address); |
2575 "GET %s%s HTTP/1.0\r\n" | |
2576 "User-Agent: \"%s\"\r\n\r\n", | |
2577 (gfud->full ? "" : "/"), | |
2578 (gfud->full ? gfud->url : gfud->website.page), | |
2579 gfud->user_agent); | |
2580 } | |
2581 } | 2598 } |
2582 else | 2599 else |
2583 { | 2600 { |
2584 if (gfud->http11) | 2601 g_snprintf(buf, sizeof(buf), |
2585 { | 2602 "GET %s%s HTTP/%s\r\n" |
2586 g_snprintf(buf, sizeof(buf), | 2603 "Host: %s\r\n\r\n", |
2587 "GET %s%s HTTP/1.1\r\n" | 2604 (gfud->full ? "" : "/"), |
2588 "Host: %s\r\n\r\n", | 2605 (gfud->full ? gfud->url : gfud->website.page), |
2589 (gfud->full ? "" : "/"), | 2606 (gfud->http11 ? "1.1" : "1.0"), |
2590 (gfud->full ? gfud->url : gfud->website.page), | 2607 gfud->website.address); |
2591 gfud->website.address); | |
2592 } | |
2593 else | |
2594 { | |
2595 g_snprintf(buf, sizeof(buf), | |
2596 "GET %s%s HTTP/1.0\r\n\r\n", | |
2597 (gfud->full ? "" : "/"), | |
2598 (gfud->full ? gfud->url : gfud->website.page)); | |
2599 } | |
2600 } | 2608 } |
2601 | 2609 |
2602 gaim_debug_misc("gaim_url_fetch", "Request: %s\n", buf); | 2610 gaim_debug_misc("gaim_url_fetch", "Request: %s\n", buf); |
2603 | 2611 |
2604 write(sock, buf, strlen(buf)); | 2612 write(sock, buf, strlen(buf)); |
2642 if (gfud->newline) | 2650 if (gfud->newline) |
2643 { | 2651 { |
2644 size_t content_len; | 2652 size_t content_len; |
2645 gfud->startsaving = TRUE; | 2653 gfud->startsaving = TRUE; |
2646 | 2654 |
2655 gaim_debug_misc("gaim_url_fetch", "Response headers: '%*.*s'\n", gfud->len, gfud->len, gfud->webdata); | |
2656 | |
2647 /* See if we can find a redirect. */ | 2657 /* See if we can find a redirect. */ |
2648 if (parse_redirect(gfud->webdata, gfud->len, sock, gfud)) | 2658 if (parse_redirect(gfud->webdata, gfud->len, sock, gfud)) |
2649 return; | 2659 return; |
2650 | 2660 |
2651 /* No redirect. See if we can find a content length. */ | 2661 /* No redirect. See if we can find a content length. */ |
2654 if (content_len == 0) | 2664 if (content_len == 0) |
2655 { | 2665 { |
2656 /* We'll stick with an initial 8192 */ | 2666 /* We'll stick with an initial 8192 */ |
2657 content_len = 8192; | 2667 content_len = 8192; |
2658 } | 2668 } |
2669 else | |
2670 { | |
2671 gfud->has_explicit_data_len = TRUE; | |
2672 } | |
2659 | 2673 |
2660 /* Out with the old... */ | 2674 /* Out with the old... */ |
2661 gfud->len = 0; | 2675 gfud->len = 0; |
2662 g_free(gfud->webdata); | 2676 g_free(gfud->webdata); |
2663 gfud->webdata = NULL; | 2677 gfud->webdata = NULL; |
2672 return; | 2686 return; |
2673 } | 2687 } |
2674 | 2688 |
2675 gfud->newline = FALSE; | 2689 gfud->newline = FALSE; |
2676 } | 2690 } |
2691 else if (gfud->has_explicit_data_len && gfud->len == gfud->data_len) | |
2692 { | |
2693 got_eof = TRUE; | |
2694 } | |
2677 } | 2695 } |
2678 else if (errno != ETIMEDOUT) | 2696 else if (errno != ETIMEDOUT) |
2679 { | 2697 { |
2680 gfud->webdata = g_realloc(gfud->webdata, gfud->len + 1); | 2698 got_eof = TRUE; |
2681 gfud->webdata[gfud->len] = 0; | |
2682 | |
2683 gaim_debug_misc("gaim_url_fetch", "Received: '%s'\n", gfud->webdata); | |
2684 | |
2685 gaim_input_remove(gfud->inpa); | |
2686 close(sock); | |
2687 gfud->callback(gfud->user_data, gfud->webdata, gfud->len); | |
2688 | |
2689 destroy_fetch_url_data(gfud); | |
2690 } | 2699 } |
2691 else | 2700 else |
2692 { | 2701 { |
2693 gaim_input_remove(gfud->inpa); | 2702 gaim_input_remove(gfud->inpa); |
2694 close(sock); | 2703 close(sock); |
2695 | 2704 |
2696 gfud->callback(gfud->user_data, NULL, 0); | 2705 gfud->callback(gfud->user_data, NULL, 0); |
2706 | |
2707 destroy_fetch_url_data(gfud); | |
2708 } | |
2709 | |
2710 if (got_eof) { | |
2711 gfud->webdata = g_realloc(gfud->webdata, gfud->len + 1); | |
2712 gfud->webdata[gfud->len] = 0; | |
2713 | |
2714 gaim_debug_misc("gaim_url_fetch", "Received: '%s'\n", gfud->webdata); | |
2715 | |
2716 gaim_input_remove(gfud->inpa); | |
2717 close(sock); | |
2718 gfud->callback(gfud->user_data, gfud->webdata, gfud->len); | |
2697 | 2719 |
2698 destroy_fetch_url_data(gfud); | 2720 destroy_fetch_url_data(gfud); |
2699 } | 2721 } |
2700 } | 2722 } |
2701 | 2723 |
2705 void (*cb)(gpointer, const char *, size_t), | 2727 void (*cb)(gpointer, const char *, size_t), |
2706 void *user_data) | 2728 void *user_data) |
2707 { | 2729 { |
2708 int sock; | 2730 int sock; |
2709 GaimFetchUrlData *gfud; | 2731 GaimFetchUrlData *gfud; |
2732 | |
2733 gaim_debug_info("gaim_url_fetch", | |
2734 "requested to fetch (%s), full=%d, user_agent=(%s), http11=%d\n", | |
2735 url, full, user_agent, http11); | |
2710 | 2736 |
2711 g_return_if_fail(url != NULL); | 2737 g_return_if_fail(url != NULL); |
2712 g_return_if_fail(cb != NULL); | 2738 g_return_if_fail(cb != NULL); |
2713 | 2739 |
2714 gfud = g_new0(GaimFetchUrlData, 1); | 2740 gfud = g_new0(GaimFetchUrlData, 1); |