comparison src/util.c @ 9240:f1d87ab17e41

[gaim-migrate @ 10039] wing writes: While working on patch 967849, found that there are a few problems with current Content-Length handling: 1. parse_content_len uses a single sscanf on data. However, data (the response headers) would begin with the http status, not the Content-Length 2. data is not nul-terminated, but sscanf assumes a nul-terminated string. This could (though very unlikely) be unpredictable if input is malformed. 3. if gaim_url_fetch succeeds in extracting the Content-Length from the response headers, it will silently discard all the data after they have been read, because it does not keep track of whether the specified number of bytes has been read (The "else" case that corresponds to this situation is an empty block.) The attached patch corrects the above problems, though the patch is still technically not correct, since Content-Length theoretically should be matched case-insensitively. He then revised the patch and wrote: More problems, unrelated to content-length handling, have been discovered: 1. Gaim's http/1.1 support is broken; http/1.1 clients are _required_ to be able to decode the "chunked" transfer encoding, but gaim cannot do it. Since the difference between gaim's http 1.0 and 1.1 support is only to send the Host header, which is not forbidden in http 1.0, I have changed gaim's http 1.0 behaviour to also send the Host header. This is required for Yahoo Japan "avatar" support. (The server requires a Host header but sends the image in the chunked encoding, which gaim cannot decode.) 2. User-Agent should not be quoted committer: Tailor Script <tailor@pidgin.im>
author Tim Ringenbach <marv@pidgin.im>
date Wed, 09 Jun 2004 01:24:47 +0000
parents 0c352d0e4ddc
children 5e9018c81bd9
comparison
equal deleted inserted replaced
9239:3434d420fe69 9240:f1d87ab17e41
55 int inpa; 55 int inpa;
56 56
57 gboolean sentreq; 57 gboolean sentreq;
58 gboolean newline; 58 gboolean newline;
59 gboolean startsaving; 59 gboolean startsaving;
60 gboolean has_explicit_data_len;
60 char *webdata; 61 char *webdata;
61 unsigned long len; 62 unsigned long len;
62 unsigned long data_len; 63 unsigned long data_len;
63 64
64 } GaimFetchUrlData; 65 } GaimFetchUrlData;
2530 2531
2531 static size_t 2532 static size_t
2532 parse_content_len(const char *data, size_t data_len) 2533 parse_content_len(const char *data, size_t data_len)
2533 { 2534 {
2534 size_t content_len = 0; 2535 size_t content_len = 0;
2535 2536 const char *p = NULL;
2536 sscanf(data, "Content-Length: %d", (int *)&content_len); 2537
2538 /* This is still technically wrong, since headers are case-insensitive
2539 * [RFC 2616, section 4.2], though this ought to catch the normal case.
2540 * Note: data is _not_ nul-terminated.
2541 */
2542 if (data_len > 16) {
2543 p = strncmp(data, "Content-Length: ", 16) == 0? data: NULL;
2544 if (!p) {
2545 p = g_strstr_len(data, data_len, "\nContent-Length: ");
2546 if (p)
2547 p += 1;
2548 }
2549 }
2550
2551 /* If we can find a Content-Length header at all, try to sscanf it.
2552 * Response headers should end with at least \r\n, so sscanf is safe,
2553 * if we make sure that there is indeed a \n in our header.
2554 */
2555 if (p && g_strstr_len(p, data_len - (p - data), "\n")) {
2556 sscanf(p, "Content-Length: %d", (int *)&content_len);
2557 gaim_debug_misc("parse_content_len", "parsed %d\n", content_len);
2558 }
2537 2559
2538 return content_len; 2560 return content_len;
2539 } 2561 }
2540 2562
2541 static void 2563 static void
2542 url_fetched_cb(gpointer url_data, gint sock, GaimInputCondition cond) 2564 url_fetched_cb(gpointer url_data, gint sock, GaimInputCondition cond)
2543 { 2565 {
2544 GaimFetchUrlData *gfud = url_data; 2566 GaimFetchUrlData *gfud = url_data;
2545 char data; 2567 char data;
2568 gboolean got_eof = FALSE;
2546 2569
2547 if (sock == -1) 2570 if (sock == -1)
2548 { 2571 {
2549 gfud->callback(gfud->user_data, NULL, 0); 2572 gfud->callback(gfud->user_data, NULL, 0);
2550 2573
2557 { 2580 {
2558 char buf[1024]; 2581 char buf[1024];
2559 2582
2560 if (gfud->user_agent) 2583 if (gfud->user_agent)
2561 { 2584 {
2562 if (gfud->http11) 2585 /* Host header is not forbidden in HTTP/1.0 requests, and HTTP/1.1
2563 { 2586 * clients must know how to handle the "chunked" transfer encoding.
2564 g_snprintf(buf, sizeof(buf), 2587 * Gaim doesn't know how to handle "chunked", so should always send
2565 "GET %s%s HTTP/1.1\r\n" 2588 * the Host header regardless, to get around some observed problems
2566 "User-Agent: \"%s\"\r\n" 2589 */
2567 "Host: %s\r\n\r\n", 2590 g_snprintf(buf, sizeof(buf),
2568 (gfud->full ? "" : "/"), 2591 "GET %s%s HTTP/%s\r\n"
2569 (gfud->full ? gfud->url : gfud->website.page), 2592 "User-Agent: %s\r\n"
2570 gfud->user_agent, gfud->website.address); 2593 "Host: %s\r\n\r\n",
2571 } 2594 (gfud->full ? "" : "/"),
2572 else 2595 (gfud->full ? gfud->url : gfud->website.page),
2573 { 2596 (gfud->http11 ? "1.1" : "1.0"),
2574 g_snprintf(buf, sizeof(buf), 2597 gfud->user_agent, gfud->website.address);
2575 "GET %s%s HTTP/1.0\r\n"
2576 "User-Agent: \"%s\"\r\n\r\n",
2577 (gfud->full ? "" : "/"),
2578 (gfud->full ? gfud->url : gfud->website.page),
2579 gfud->user_agent);
2580 }
2581 } 2598 }
2582 else 2599 else
2583 { 2600 {
2584 if (gfud->http11) 2601 g_snprintf(buf, sizeof(buf),
2585 { 2602 "GET %s%s HTTP/%s\r\n"
2586 g_snprintf(buf, sizeof(buf), 2603 "Host: %s\r\n\r\n",
2587 "GET %s%s HTTP/1.1\r\n" 2604 (gfud->full ? "" : "/"),
2588 "Host: %s\r\n\r\n", 2605 (gfud->full ? gfud->url : gfud->website.page),
2589 (gfud->full ? "" : "/"), 2606 (gfud->http11 ? "1.1" : "1.0"),
2590 (gfud->full ? gfud->url : gfud->website.page), 2607 gfud->website.address);
2591 gfud->website.address);
2592 }
2593 else
2594 {
2595 g_snprintf(buf, sizeof(buf),
2596 "GET %s%s HTTP/1.0\r\n\r\n",
2597 (gfud->full ? "" : "/"),
2598 (gfud->full ? gfud->url : gfud->website.page));
2599 }
2600 } 2608 }
2601 2609
2602 gaim_debug_misc("gaim_url_fetch", "Request: %s\n", buf); 2610 gaim_debug_misc("gaim_url_fetch", "Request: %s\n", buf);
2603 2611
2604 write(sock, buf, strlen(buf)); 2612 write(sock, buf, strlen(buf));
2642 if (gfud->newline) 2650 if (gfud->newline)
2643 { 2651 {
2644 size_t content_len; 2652 size_t content_len;
2645 gfud->startsaving = TRUE; 2653 gfud->startsaving = TRUE;
2646 2654
2655 gaim_debug_misc("gaim_url_fetch", "Response headers: '%*.*s'\n", gfud->len, gfud->len, gfud->webdata);
2656
2647 /* See if we can find a redirect. */ 2657 /* See if we can find a redirect. */
2648 if (parse_redirect(gfud->webdata, gfud->len, sock, gfud)) 2658 if (parse_redirect(gfud->webdata, gfud->len, sock, gfud))
2649 return; 2659 return;
2650 2660
2651 /* No redirect. See if we can find a content length. */ 2661 /* No redirect. See if we can find a content length. */
2654 if (content_len == 0) 2664 if (content_len == 0)
2655 { 2665 {
2656 /* We'll stick with an initial 8192 */ 2666 /* We'll stick with an initial 8192 */
2657 content_len = 8192; 2667 content_len = 8192;
2658 } 2668 }
2669 else
2670 {
2671 gfud->has_explicit_data_len = TRUE;
2672 }
2659 2673
2660 /* Out with the old... */ 2674 /* Out with the old... */
2661 gfud->len = 0; 2675 gfud->len = 0;
2662 g_free(gfud->webdata); 2676 g_free(gfud->webdata);
2663 gfud->webdata = NULL; 2677 gfud->webdata = NULL;
2672 return; 2686 return;
2673 } 2687 }
2674 2688
2675 gfud->newline = FALSE; 2689 gfud->newline = FALSE;
2676 } 2690 }
2691 else if (gfud->has_explicit_data_len && gfud->len == gfud->data_len)
2692 {
2693 got_eof = TRUE;
2694 }
2677 } 2695 }
2678 else if (errno != ETIMEDOUT) 2696 else if (errno != ETIMEDOUT)
2679 { 2697 {
2680 gfud->webdata = g_realloc(gfud->webdata, gfud->len + 1); 2698 got_eof = TRUE;
2681 gfud->webdata[gfud->len] = 0;
2682
2683 gaim_debug_misc("gaim_url_fetch", "Received: '%s'\n", gfud->webdata);
2684
2685 gaim_input_remove(gfud->inpa);
2686 close(sock);
2687 gfud->callback(gfud->user_data, gfud->webdata, gfud->len);
2688
2689 destroy_fetch_url_data(gfud);
2690 } 2699 }
2691 else 2700 else
2692 { 2701 {
2693 gaim_input_remove(gfud->inpa); 2702 gaim_input_remove(gfud->inpa);
2694 close(sock); 2703 close(sock);
2695 2704
2696 gfud->callback(gfud->user_data, NULL, 0); 2705 gfud->callback(gfud->user_data, NULL, 0);
2706
2707 destroy_fetch_url_data(gfud);
2708 }
2709
2710 if (got_eof) {
2711 gfud->webdata = g_realloc(gfud->webdata, gfud->len + 1);
2712 gfud->webdata[gfud->len] = 0;
2713
2714 gaim_debug_misc("gaim_url_fetch", "Received: '%s'\n", gfud->webdata);
2715
2716 gaim_input_remove(gfud->inpa);
2717 close(sock);
2718 gfud->callback(gfud->user_data, gfud->webdata, gfud->len);
2697 2719
2698 destroy_fetch_url_data(gfud); 2720 destroy_fetch_url_data(gfud);
2699 } 2721 }
2700 } 2722 }
2701 2723
2705 void (*cb)(gpointer, const char *, size_t), 2727 void (*cb)(gpointer, const char *, size_t),
2706 void *user_data) 2728 void *user_data)
2707 { 2729 {
2708 int sock; 2730 int sock;
2709 GaimFetchUrlData *gfud; 2731 GaimFetchUrlData *gfud;
2732
2733 gaim_debug_info("gaim_url_fetch",
2734 "requested to fetch (%s), full=%d, user_agent=(%s), http11=%d\n",
2735 url, full, user_agent, http11);
2710 2736
2711 g_return_if_fail(url != NULL); 2737 g_return_if_fail(url != NULL);
2712 g_return_if_fail(cb != NULL); 2738 g_return_if_fail(cb != NULL);
2713 2739
2714 gfud = g_new0(GaimFetchUrlData, 1); 2740 gfud = g_new0(GaimFetchUrlData, 1);