Mercurial > pidgin.yaz
annotate src/html.c @ 6160:7416a43dc786
[gaim-migrate @ 6635]
Hold it.. Nobody said anything about _three_ books. Oh that stinkin' wise
man. He was so busy filling me full of his secret little workds and his
phrases and his BALONEY that he never said anything about this...
committer: Tailor Script <tailor@pidgin.im>
author | Christian Hammond <chipx86@chipx86.com> |
---|---|
date | Wed, 16 Jul 2003 22:43:27 +0000 |
parents | 3de23c9ca1e4 |
children | 64d952098596 |
rev | line source |
---|---|
1 | 1 /* |
2 * gaim | |
3 * | |
4 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net> | |
5176 | 5 * 2003, Nathan Walp <faceprint@faceprint.com> |
1 | 6 * |
7 * This program is free software; you can redistribute it and/or modify | |
8 * it under the terms of the GNU General Public License as published by | |
9 * the Free Software Foundation; either version 2 of the License, or | |
10 * (at your option) any later version. | |
11 * | |
12 * This program is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU General Public License | |
18 * along with this program; if not, write to the Free Software | |
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 * | |
21 */ | |
5872
059d95c67cda
[gaim-migrate @ 6304]
Christian Hammond <chipx86@chipx86.com>
parents:
5681
diff
changeset
|
22 #include "internal.h" |
3630 | 23 |
5872
059d95c67cda
[gaim-migrate @ 6304]
Christian Hammond <chipx86@chipx86.com>
parents:
5681
diff
changeset
|
24 #include "debug.h" |
6115
11bedb793a44
[gaim-migrate @ 6578]
Christian Hammond <chipx86@chipx86.com>
parents:
5940
diff
changeset
|
25 #include "html.h" |
1092
a930439f29b1
[gaim-migrate @ 1102]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1088
diff
changeset
|
26 #include "proxy.h" |
1 | 27 |
5872
059d95c67cda
[gaim-migrate @ 6304]
Christian Hammond <chipx86@chipx86.com>
parents:
5681
diff
changeset
|
28 #include "gaim.h" |
3717
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
29 |
4359
5fb47ec9bfe4
[gaim-migrate @ 4625]
Christian Hammond <chipx86@chipx86.com>
parents:
4335
diff
changeset
|
30 gchar *strip_html(const gchar *text) |
1 | 31 { |
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
32 int i, j, k; |
1 | 33 int visible = 1; |
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
34 gchar *text2 = g_strdup(text); |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
35 |
4757 | 36 if(!text) |
37 return NULL; | |
4503 | 38 |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
39 for (i = 0, j = 0; text2[i]; i++) { |
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
40 if (text2[i] == '<') { |
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
41 k = i + 1; |
4777 | 42 if(g_ascii_isspace(text2[k])) { |
43 visible = 1; | |
44 } else { | |
45 while (text2[k]) { | |
46 if (text2[k] == '<') { | |
47 visible = 1; | |
48 break; | |
49 } | |
50 if (text2[k] == '>') { | |
51 visible = 0; | |
52 break; | |
53 } | |
54 k++; | |
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
55 } |
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
56 } |
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
57 } else if (text2[i] == '>' && !visible) { |
1 | 58 visible = 1; |
59 continue; | |
60 } | |
4473 | 61 if (text2[i] == '&' && strncasecmp(text2+i,""",6) == 0) { |
62 text2[j++] = '\"'; | |
63 i = i+5; | |
64 continue; | |
65 } | |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
66 if (visible) { |
1 | 67 text2[j++] = text2[i]; |
68 } | |
69 } | |
70 text2[j] = '\0'; | |
71 return text2; | |
72 } | |
73 | |
3630 | 74 struct g_url *parse_url(char *url) |
1 | 75 { |
5512 | 76 struct g_url *test = g_new0(struct g_url, 1); |
1 | 77 char scan_info[255]; |
78 char port[5]; | |
79 int f; | |
5501
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
80 char* turl; |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
81 /* hyphen at end includes it in control set */ |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
82 char addr_ctrl[] = "A-Za-z0-9.-"; |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
83 char port_ctrl[] = "0-9"; |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
84 char page_ctrl[] = "A-Za-z0-9.~_/&%%?=+^-"; |
1 | 85 |
5501
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
86 if((turl=strstr(url, "http://")) || (turl=strstr(url, "HTTP://"))) |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
87 url=turl+=7; |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
88 |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
89 snprintf(scan_info, sizeof(scan_info), |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
90 "%%[%s]:%%[%s]/%%[%s]", |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
91 addr_ctrl, port_ctrl, page_ctrl); |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
92 |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
93 f = sscanf(url, scan_info, test->address, port, test->page); |
1 | 94 if (f == 1) { |
5501
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
95 snprintf(scan_info, sizeof(scan_info), |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
96 "%%[%s]/%%[%s]", |
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
97 addr_ctrl, page_ctrl); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
98 f = sscanf(url, scan_info, test->address, test->page); |
5501
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
99 snprintf(port, sizeof(port), "80"); |
1 | 100 } |
5501
36d2c875a822
[gaim-migrate @ 5900]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
5211
diff
changeset
|
101 if (f == 1) |
5512 | 102 test->page[0] = '\0'; |
1 | 103 |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
104 sscanf(port, "%d", &test->port); |
1 | 105 return test; |
106 } | |
107 | |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
108 struct grab_url_data { |
4322 | 109 void (* callback)(gpointer, char *, unsigned long); |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
110 gpointer data; |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
111 struct g_url *website; |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
112 char *url; |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
113 gboolean full; |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
114 |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
115 int inpa; |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
116 |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
117 gboolean sentreq; |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
118 gboolean newline; |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
119 gboolean startsaving; |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
120 char *webdata; |
4322 | 121 unsigned long len; |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
122 unsigned long data_len; |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
123 }; |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
124 |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
125 static gboolean |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
126 parse_redirect(const char *data, size_t data_len, gint sock, |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
127 struct grab_url_data *gunk) |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
128 { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
129 gchar *s; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
130 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
131 if ((s = g_strstr_len(data, data_len, "Location: ")) != NULL) { |
6128
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
132 gchar *new_url, *temp_url, *end; |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
133 gboolean full; |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
134 int len; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
135 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
136 s += strlen("Location: "); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
137 end = strchr(s, '\r'); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
138 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
139 /* Just in case :) */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
140 if (end == NULL) |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
141 end = strchr(s, '\n'); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
142 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
143 len = end - s; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
144 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
145 new_url = g_malloc(len + 1); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
146 strncpy(new_url, s, len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
147 new_url[len] = '\0'; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
148 |
6128
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
149 full = gunk->full; |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
150 |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
151 if (*new_url == '/' || g_strstr_len(new_url, len, "://") == NULL) { |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
152 temp_url = new_url; |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
153 |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
154 new_url = g_strdup_printf("%s:%d%s", gunk->website->address, |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
155 gunk->website->port, temp_url); |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
156 |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
157 g_free(temp_url); |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
158 |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
159 full = FALSE; |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
160 } |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
161 |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
162 /* Close the existing stuff. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
163 gaim_input_remove(gunk->inpa); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
164 close(sock); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
165 |
6128
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
166 gaim_debug(GAIM_DEBUG_INFO, "grab_url", |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
167 "Redirecting to %s\n", new_url); |
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
168 |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
169 /* Try again, with this new location. */ |
6128
3de23c9ca1e4
[gaim-migrate @ 6602]
Christian Hammond <chipx86@chipx86.com>
parents:
6115
diff
changeset
|
170 grab_url(new_url, full, gunk->callback, gunk->data); |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
171 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
172 /* Free up. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
173 g_free(new_url); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
174 g_free(gunk->webdata); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
175 g_free(gunk->website); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
176 g_free(gunk->url); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
177 g_free(gunk); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
178 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
179 return TRUE; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
180 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
181 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
182 return FALSE; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
183 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
184 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
185 static size_t |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
186 parse_content_len(const char *data, size_t data_len) |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
187 { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
188 size_t content_len = 0; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
189 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
190 sscanf(data, "Content-Length: %d", &content_len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
191 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
192 return content_len; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
193 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
194 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
195 static void grab_url_callback(gpointer dat, gint sock, GaimInputCondition cond) |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
196 { |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
197 struct grab_url_data *gunk = dat; |
1 | 198 char data; |
199 | |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
200 if (sock == -1) { |
4322 | 201 gunk->callback(gunk->data, NULL, 0); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
202 g_free(gunk->website); |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
203 g_free(gunk->url); |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
204 g_free(gunk); |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
205 return; |
1087
56c7ceb986a8
[gaim-migrate @ 1097]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
691
diff
changeset
|
206 } |
1 | 207 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
208 if (!gunk->sentreq) { |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
209 char buf[256]; |
3717
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
210 |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
211 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n\r\n", gunk->full ? "" : "/", |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
212 gunk->full ? gunk->url : gunk->website->page); |
5211
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
213 |
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
214 gaim_debug(GAIM_DEBUG_MISC, "grab_url_callback", |
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
215 "Request: %s\n", buf); |
3717
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
216 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
217 write(sock, buf, strlen(buf)); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
218 fcntl(sock, F_SETFL, O_NONBLOCK); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
219 gunk->sentreq = TRUE; |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
220 gunk->inpa = gaim_input_add(sock, GAIM_INPUT_READ, grab_url_callback, dat); |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
221 gunk->data_len = 4096; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
222 gunk->webdata = g_malloc(gunk->data_len); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
223 return; |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
224 } |
1 | 225 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
226 if (read(sock, &data, 1) > 0 || errno == EWOULDBLOCK) { |
278
29e1669b006b
[gaim-migrate @ 288]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
268
diff
changeset
|
227 if (errno == EWOULDBLOCK) { |
29e1669b006b
[gaim-migrate @ 288]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
268
diff
changeset
|
228 errno = 0; |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
229 return; |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
230 } |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
231 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
232 gunk->len++; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
233 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
234 if (gunk->len == gunk->data_len + 1) { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
235 gunk->data_len += (gunk->data_len) / 2; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
236 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
237 gunk->webdata = g_realloc(gunk->webdata, gunk->data_len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
238 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
239 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
240 gunk->webdata[gunk->len - 1] = data; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
241 |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
242 if (!gunk->startsaving) { |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
243 if (data == '\r') |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
244 return; |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
245 if (data == '\n') { |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
246 if (gunk->newline) { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
247 size_t content_len; |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
248 gunk->startsaving = TRUE; |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
249 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
250 /* See if we can find a redirect. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
251 if (parse_redirect(gunk->webdata, gunk->len, sock, gunk)) |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
252 return; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
253 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
254 /* No redirect. See if we can find a content length. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
255 content_len = parse_content_len(gunk->webdata, gunk->len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
256 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
257 if (content_len == 0) { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
258 /* We'll stick with an initial 8192 */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
259 content_len = 8192; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
260 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
261 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
262 /* Out with the old... */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
263 gunk->len = 0; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
264 g_free(gunk->webdata); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
265 gunk->webdata = NULL; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
266 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
267 /* In with the new. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
268 gunk->data_len = content_len; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
269 gunk->webdata = g_malloc(gunk->data_len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
270 } |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
271 else |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
272 gunk->newline = TRUE; |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
273 return; |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
274 } |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
275 gunk->newline = FALSE; |
278
29e1669b006b
[gaim-migrate @ 288]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
268
diff
changeset
|
276 } |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
277 } else if (errno != ETIMEDOUT) { |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
278 gunk->webdata = g_realloc(gunk->webdata, gunk->len + 1); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
279 gunk->webdata[gunk->len] = 0; |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
280 |
5211
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
281 gaim_debug(GAIM_DEBUG_MISC, "grab_url_callback", |
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
282 "Received: '%s'\n", gunk->webdata); |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
283 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
284 gaim_input_remove(gunk->inpa); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
285 close(sock); |
4322 | 286 gunk->callback(gunk->data, gunk->webdata, gunk->len); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
287 if (gunk->webdata) |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
288 g_free(gunk->webdata); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
289 g_free(gunk->website); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
290 g_free(gunk->url); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
291 g_free(gunk); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
292 } else { |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
293 gaim_input_remove(gunk->inpa); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
294 close(sock); |
4322 | 295 gunk->callback(gunk->data, NULL, 0); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
296 if (gunk->webdata) |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
297 g_free(gunk->webdata); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
298 g_free(gunk->website); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
299 g_free(gunk->url); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
300 g_free(gunk); |
1 | 301 } |
302 } | |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
303 |
4322 | 304 void grab_url(char *url, gboolean full, void callback(gpointer, char *, unsigned long), gpointer data) |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
305 { |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
306 int sock; |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
307 struct grab_url_data *gunk = g_new0(struct grab_url_data, 1); |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
308 |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
309 gunk->callback = callback; |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
310 gunk->data = data; |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
311 gunk->url = g_strdup(url); |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
312 gunk->website = parse_url(url); |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
313 gunk->full = full; |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
314 |
5681
46d7ad0dfa26
[gaim-migrate @ 6100]
Christian Hammond <chipx86@chipx86.com>
parents:
5512
diff
changeset
|
315 if ((sock = gaim_proxy_connect(NULL, gunk->website->address, |
46d7ad0dfa26
[gaim-migrate @ 6100]
Christian Hammond <chipx86@chipx86.com>
parents:
5512
diff
changeset
|
316 gunk->website->port, grab_url_callback, |
46d7ad0dfa26
[gaim-migrate @ 6100]
Christian Hammond <chipx86@chipx86.com>
parents:
5512
diff
changeset
|
317 gunk)) < 0) { |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
318 g_free(gunk->website); |
1881
a02584b98823
[gaim-migrate @ 1891]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1843
diff
changeset
|
319 g_free(gunk->url); |
a02584b98823
[gaim-migrate @ 1891]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1843
diff
changeset
|
320 g_free(gunk); |
4322 | 321 callback(data, g_strdup(_("g003: Error opening connection.\n")), 0); |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
322 } |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
323 } |
5093 | 324 |
5104 | 325 struct gaim_parse_tag { |
326 char *src_tag; | |
327 char *dest_tag; | |
328 }; | |
329 | |
5093 | 330 #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ |
5176 | 331 const char *o = c + strlen("<" x); \ |
5141 | 332 const char *p = NULL, *q = NULL, *r = NULL; \ |
5176 | 333 GString *innards = g_string_new(""); \ |
334 while(o && *o) { \ | |
5141 | 335 if(!q && (*o == '\"' || *o == '\'') ) { \ |
336 q = o; \ | |
337 } else if(q) { \ | |
338 if(*o == *q) { \ | |
5176 | 339 char *unescaped = g_strndup(q+1, o-q-1); \ |
340 char *escaped = g_markup_escape_text(unescaped, -1); \ | |
341 g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ | |
5940 | 342 g_free(unescaped); \ |
343 g_free(escaped); \ | |
5141 | 344 q = NULL; \ |
345 } else if(*c == '\\') { \ | |
346 o++; \ | |
347 } \ | |
348 } else if(*o == '<') { \ | |
349 r = o; \ | |
350 } else if(*o == '>') { \ | |
351 p = o; \ | |
352 break; \ | |
5176 | 353 } else { \ |
354 innards = g_string_append_c(innards, *o); \ | |
5141 | 355 } \ |
356 o++; \ | |
357 } \ | |
358 if(p && !r) { \ | |
5104 | 359 if(*(p-1) != '/') { \ |
360 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ | |
361 pt->src_tag = x; \ | |
362 pt->dest_tag = y; \ | |
363 tags = g_list_prepend(tags, pt); \ | |
364 } \ | |
5093 | 365 xhtml = g_string_append(xhtml, "<" y); \ |
366 c += strlen("<" x ); \ | |
5176 | 367 xhtml = g_string_append(xhtml, innards->str); \ |
368 xhtml = g_string_append_c(xhtml, '>'); \ | |
5093 | 369 c = p + 1; \ |
370 } else { \ | |
371 xhtml = g_string_append(xhtml, "<"); \ | |
5110 | 372 plain = g_string_append_c(plain, '<'); \ |
5176 | 373 c++; \ |
5093 | 374 } \ |
5176 | 375 g_string_free(innards, TRUE); \ |
5093 | 376 continue; \ |
377 } \ | |
378 if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ | |
379 (*(c+strlen("<" x)) == '>' || \ | |
380 !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ | |
381 xhtml = g_string_append(xhtml, "<" y); \ | |
382 c += strlen("<" x); \ | |
5104 | 383 if(*c != '/') { \ |
384 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ | |
385 pt->src_tag = x; \ | |
386 pt->dest_tag = y; \ | |
387 tags = g_list_prepend(tags, pt); \ | |
5110 | 388 xhtml = g_string_append_c(xhtml, '>'); \ |
389 } else { \ | |
390 xhtml = g_string_append(xhtml, "/>");\ | |
5104 | 391 } \ |
5110 | 392 c = strchr(c, '>') + 1; \ |
5093 | 393 continue; \ |
394 } | |
395 #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) | |
396 | |
5110 | 397 void html_to_xhtml(const char *html, char **xhtml_out, char **plain_out) { |
5093 | 398 GString *xhtml = g_string_new(""); |
5110 | 399 GString *plain = g_string_new(""); |
5093 | 400 GList *tags = NULL, *tag; |
5141 | 401 const char *c = html; |
5176 | 402 |
403 while(c && *c) { | |
5141 | 404 if(*c == '<') { |
5093 | 405 if(*(c+1) == '/') { /* closing tag */ |
406 tag = tags; | |
407 while(tag) { | |
5104 | 408 struct gaim_parse_tag *pt = tag->data; |
409 if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { | |
410 c += strlen(pt->src_tag) + 3; | |
5093 | 411 break; |
412 } | |
413 tag = tag->next; | |
414 } | |
415 if(tag) { | |
416 while(tags) { | |
5104 | 417 struct gaim_parse_tag *pt = tags->data; |
418 g_string_append_printf(xhtml, "</%s>", pt->dest_tag); | |
5093 | 419 if(tags == tag) |
420 break; | |
5104 | 421 tags = g_list_remove(tags, pt); |
422 g_free(pt); | |
5093 | 423 } |
5104 | 424 g_free(tag->data); |
5093 | 425 tags = g_list_remove(tags, tag->data); |
426 } else { | |
427 /* we tried to close a tag we never opened! escape it | |
428 * and move on */ | |
429 xhtml = g_string_append(xhtml, "<"); | |
5110 | 430 plain = g_string_append_c(plain, '<'); |
5093 | 431 c++; |
432 } | |
433 } else { /* opening tag */ | |
434 ALLOW_TAG("a"); | |
5101 | 435 ALLOW_TAG_ALT("b", "strong"); |
5093 | 436 ALLOW_TAG("blockquote"); |
5101 | 437 ALLOW_TAG_ALT("bold", "strong"); |
5093 | 438 ALLOW_TAG("cite"); |
439 ALLOW_TAG("div"); | |
440 ALLOW_TAG("em"); | |
441 ALLOW_TAG("h1"); | |
442 ALLOW_TAG("h2"); | |
443 ALLOW_TAG("h3"); | |
444 ALLOW_TAG("h4"); | |
445 ALLOW_TAG("h5"); | |
446 ALLOW_TAG("h6"); | |
447 ALLOW_TAG("html"); | |
5101 | 448 ALLOW_TAG_ALT("i", "em"); |
449 ALLOW_TAG_ALT("italic", "em"); | |
5093 | 450 ALLOW_TAG("li"); |
451 ALLOW_TAG("ol"); | |
452 ALLOW_TAG("p"); | |
453 ALLOW_TAG("pre"); | |
454 ALLOW_TAG("q"); | |
455 ALLOW_TAG("span"); | |
456 ALLOW_TAG("strong"); | |
457 ALLOW_TAG("ul"); | |
458 | |
5174 | 459 /* we skip <HR> because it's not legal in XHTML-IM. However, |
460 * we still want to send something sensible, so we put a | |
461 * linebreak in its place. <BR> also needs special handling | |
462 * because putting a </BR> to close it would just be dumb. */ | |
463 if((!g_ascii_strncasecmp(c, "<br", 3) | |
464 || !g_ascii_strncasecmp(c, "<hr", 3)) | |
465 && (*(c+3) == '>' || | |
466 !g_ascii_strncasecmp(c+3, "/>", 2) || | |
467 !g_ascii_strncasecmp(c+3, " />", 3))) { | |
468 c = strchr(c, '>') + 1; | |
469 xhtml = g_string_append(xhtml, "<br/>"); | |
470 if(*c != '\n') | |
471 plain = g_string_append_c(plain, '\n'); | |
472 continue; | |
473 } | |
474 if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { | |
5104 | 475 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); |
476 pt->src_tag = *(c+2) == '>' ? "u" : "underline"; | |
477 pt->dest_tag = "span"; | |
478 tags = g_list_prepend(tags, pt); | |
479 c = strchr(c, '>') + 1; | |
480 xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); | |
481 continue; | |
482 } | |
5174 | 483 if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { |
5104 | 484 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); |
485 pt->src_tag = *(c+2) == '>' ? "s" : "strike"; | |
486 pt->dest_tag = "span"; | |
487 tags = g_list_prepend(tags, pt); | |
488 c = strchr(c, '>') + 1; | |
489 xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); | |
490 continue; | |
491 } | |
492 if(!g_ascii_strncasecmp(c, "<sub>", 5)) { | |
493 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
494 pt->src_tag = "sub"; | |
495 pt->dest_tag = "span"; | |
496 tags = g_list_prepend(tags, pt); | |
497 c = strchr(c, '>') + 1; | |
498 xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); | |
499 continue; | |
500 } | |
501 if(!g_ascii_strncasecmp(c, "<sup>", 5)) { | |
502 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
503 pt->src_tag = "sup"; | |
504 pt->dest_tag = "span"; | |
505 tags = g_list_prepend(tags, pt); | |
506 c = strchr(c, '>') + 1; | |
507 xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); | |
508 continue; | |
509 } | |
5107 | 510 if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { |
511 const char *p = c; | |
512 GString *style = g_string_new(""); | |
513 struct gaim_parse_tag *pt; | |
514 while(*p && *p != '>') { | |
515 if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) { | |
516 const char *q = p + strlen("color="); | |
517 GString *color = g_string_new(""); | |
518 if(*q == '\'' || *q == '\"') | |
519 q++; | |
520 while(*q && *q != '\"' && *q != '\'' && *q != ' ') { | |
521 color = g_string_append_c(color, *q); | |
522 q++; | |
523 } | |
524 g_string_append_printf(style, "color: %s; ", color->str); | |
525 g_string_free(color, TRUE); | |
526 p = q; | |
527 } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) { | |
528 const char *q = p + strlen("face="); | |
529 gboolean space_allowed = FALSE; | |
530 GString *face = g_string_new(""); | |
531 if(*q == '\'' || *q == '\"') { | |
532 space_allowed = TRUE; | |
533 q++; | |
534 } | |
535 while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) { | |
536 face = g_string_append_c(face, *q); | |
537 q++; | |
538 } | |
539 g_string_append_printf(style, "font-family: %s; ", face->str); | |
540 g_string_free(face, TRUE); | |
541 p = q; | |
542 } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) { | |
543 const char *q = p + strlen("size="); | |
544 int sz; | |
545 const char *size = "medium"; | |
546 if(*q == '\'' || *q == '\"') | |
547 q++; | |
548 sz = atoi(q); | |
549 if(sz < 3) | |
550 size = "smaller"; | |
551 else if(sz > 3) | |
552 size = "larger"; | |
553 g_string_append_printf(style, "font-size: %s; ", size); | |
554 p = q; | |
555 } | |
556 p++; | |
557 } | |
558 c = strchr(c, '>') + 1; | |
559 pt = g_new0(struct gaim_parse_tag, 1); | |
560 pt->src_tag = "font"; | |
561 pt->dest_tag = "span"; | |
562 tags = g_list_prepend(tags, pt); | |
563 xhtml = g_string_append(xhtml, "<span"); | |
564 if(style->len) | |
565 g_string_append_printf(xhtml, " style='%s'", style->str); | |
566 xhtml = g_string_append_c(xhtml, '>'); | |
567 g_string_free(style, TRUE); | |
568 continue; | |
569 } | |
570 if(!g_ascii_strncasecmp(c, "<body ", 6)) { | |
571 const char *p = c; | |
572 gboolean did_something = FALSE; | |
573 while(*p && *p != '>') { | |
574 if(!g_ascii_strncasecmp(p, "bgcolor=", strlen("bgcolor="))) { | |
575 const char *q = p + strlen("bgcolor="); | |
576 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
577 GString *color = g_string_new(""); | |
578 if(*q == '\'' || *q == '\"') | |
579 q++; | |
580 while(*q && *q != '\"' && *q != '\'' && *q != ' ') { | |
581 color = g_string_append_c(color, *q); | |
582 q++; | |
583 } | |
584 g_string_append_printf(xhtml, "<span style='background: %s;'>", color->str); | |
585 g_string_free(color, TRUE); | |
586 c = strchr(c, '>') + 1; | |
587 pt->src_tag = "body"; | |
588 pt->dest_tag = "span"; | |
589 tags = g_list_prepend(tags, pt); | |
590 did_something = TRUE; | |
591 break; | |
592 } | |
593 p++; | |
594 } | |
595 if(did_something) continue; | |
596 } | |
597 /* this has to come after the special case for bgcolor */ | |
598 ALLOW_TAG("body"); | |
5093 | 599 if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { |
600 char *p = strstr(c + strlen("<!--"), "-->"); | |
601 if(p) { | |
602 xhtml = g_string_append(xhtml, "<!--"); | |
603 c += strlen("<!--"); | |
604 continue; | |
605 } | |
606 } | |
607 | |
608 xhtml = g_string_append(xhtml, "<"); | |
5110 | 609 plain = g_string_append_c(plain, '<'); |
5093 | 610 c++; |
611 } | |
612 } else { | |
613 xhtml = g_string_append_c(xhtml, *c); | |
5110 | 614 plain = g_string_append_c(plain, *c); |
5093 | 615 c++; |
616 } | |
617 } | |
618 tag = tags; | |
619 while(tag) { | |
620 g_string_append_printf(xhtml, "</%s>", (char *)tag->data); | |
621 tag = tag->next; | |
622 } | |
623 g_list_free(tags); | |
5110 | 624 if(xhtml_out) |
625 *xhtml_out = g_strdup(xhtml->str); | |
626 if(plain_out) | |
627 *plain_out = g_strdup(plain->str); | |
5093 | 628 g_string_free(xhtml, TRUE); |
5110 | 629 g_string_free(plain, TRUE); |
5093 | 630 } |