Mercurial > pidgin
annotate src/html.c @ 5216:00bd3019749e
[gaim-migrate @ 5586]
debug_printf -> gaim_debug
committer: Tailor Script <tailor@pidgin.im>
author | Christian Hammond <chipx86@chipx86.com> |
---|---|
date | Sat, 26 Apr 2003 07:55:04 +0000 |
parents | 0241d6b6702d |
children | 36d2c875a822 |
rev | line source |
---|---|
1 | 1 /* |
2 * gaim | |
3 * | |
4 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net> | |
5176 | 5 * 2003, Nathan Walp <faceprint@faceprint.com> |
1 | 6 * |
7 * This program is free software; you can redistribute it and/or modify | |
8 * it under the terms of the GNU General Public License as published by | |
9 * the Free Software Foundation; either version 2 of the License, or | |
10 * (at your option) any later version. | |
11 * | |
12 * This program is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU General Public License | |
18 * along with this program; if not, write to the Free Software | |
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 * | |
21 */ | |
22 | |
349
b402a23f35df
[gaim-migrate @ 359]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
278
diff
changeset
|
23 #ifdef HAVE_CONFIG_H |
2090
b66aca8e8dce
[gaim-migrate @ 2100]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2060
diff
changeset
|
24 #include <config.h> |
349
b402a23f35df
[gaim-migrate @ 359]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
278
diff
changeset
|
25 #endif |
1 | 26 #include <string.h> |
27 #include <stdio.h> | |
28 #include <stdlib.h> | |
3630 | 29 |
30 #ifndef _WIN32 | |
1 | 31 #include <sys/time.h> |
32 #include <unistd.h> | |
33 #include <sys/socket.h> | |
34 #include <netdb.h> | |
35 #include <netinet/in.h> | |
3630 | 36 #endif |
37 | |
38 #include <sys/types.h> | |
278
29e1669b006b
[gaim-migrate @ 288]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
268
diff
changeset
|
39 #include <fcntl.h> |
29e1669b006b
[gaim-migrate @ 288]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
268
diff
changeset
|
40 #include <errno.h> |
3630 | 41 #include "gaim.h" |
1092
a930439f29b1
[gaim-migrate @ 1102]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1088
diff
changeset
|
42 #include "proxy.h" |
1 | 43 |
3717
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
44 #ifdef _WIN32 |
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
45 #include "win32dep.h" |
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
46 #endif |
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
47 |
4359
5fb47ec9bfe4
[gaim-migrate @ 4625]
Christian Hammond <chipx86@chipx86.com>
parents:
4335
diff
changeset
|
48 gchar *strip_html(const gchar *text) |
1 | 49 { |
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
50 int i, j, k; |
1 | 51 int visible = 1; |
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
52 gchar *text2 = g_strdup(text); |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
53 |
4757 | 54 if(!text) |
55 return NULL; | |
4503 | 56 |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
57 for (i = 0, j = 0; text2[i]; i++) { |
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
58 if (text2[i] == '<') { |
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
59 k = i + 1; |
4777 | 60 if(g_ascii_isspace(text2[k])) { |
61 visible = 1; | |
62 } else { | |
63 while (text2[k]) { | |
64 if (text2[k] == '<') { | |
65 visible = 1; | |
66 break; | |
67 } | |
68 if (text2[k] == '>') { | |
69 visible = 0; | |
70 break; | |
71 } | |
72 k++; | |
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
73 } |
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
74 } |
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
75 } else if (text2[i] == '>' && !visible) { |
1 | 76 visible = 1; |
77 continue; | |
78 } | |
4473 | 79 if (text2[i] == '&' && strncasecmp(text2+i,""",6) == 0) { |
80 text2[j++] = '\"'; | |
81 i = i+5; | |
82 continue; | |
83 } | |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
84 if (visible) { |
1 | 85 text2[j++] = text2[i]; |
86 } | |
87 } | |
88 text2[j] = '\0'; | |
89 return text2; | |
90 } | |
91 | |
3630 | 92 struct g_url *parse_url(char *url) |
1 | 93 { |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
94 struct g_url *test = g_new0(struct g_url, 1); |
1 | 95 char scan_info[255]; |
96 char port[5]; | |
97 int f; | |
98 | |
99 if (strstr(url, "http://")) | |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
100 g_snprintf(scan_info, sizeof(scan_info), |
2060 | 101 "http://%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+]"); |
1 | 102 else |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
103 g_snprintf(scan_info, sizeof(scan_info), |
2060 | 104 "%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+^]"); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
105 f = sscanf(url, scan_info, test->address, port, test->page); |
1 | 106 if (f == 1) { |
107 if (strstr(url, "http://")) | |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
108 g_snprintf(scan_info, sizeof(scan_info), |
2060 | 109 "http://%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]"); |
1 | 110 else |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
111 g_snprintf(scan_info, sizeof(scan_info), |
2060 | 112 "%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]"); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
113 f = sscanf(url, scan_info, test->address, test->page); |
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
114 g_snprintf(port, sizeof(test->port), "80"); |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
115 port[2] = 0; |
1 | 116 } |
117 if (f == 1) { | |
118 if (strstr(url, "http://")) | |
119 g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]"); | |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
120 else |
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
121 g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]"); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
122 f = sscanf(url, scan_info, test->address); |
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
123 g_snprintf(test->page, sizeof(test->page), "%c", '\0'); |
1 | 124 } |
125 | |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
126 sscanf(port, "%d", &test->port); |
1 | 127 return test; |
128 } | |
129 | |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
130 struct grab_url_data { |
4322 | 131 void (* callback)(gpointer, char *, unsigned long); |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
132 gpointer data; |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
133 struct g_url *website; |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
134 char *url; |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
135 gboolean full; |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
136 |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
137 int inpa; |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
138 |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
139 gboolean sentreq; |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
140 gboolean newline; |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
141 gboolean startsaving; |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
142 char *webdata; |
4322 | 143 unsigned long len; |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
144 unsigned long data_len; |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
145 }; |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
146 |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
147 static gboolean |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
148 parse_redirect(const char *data, size_t data_len, gint sock, |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
149 struct grab_url_data *gunk) |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
150 { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
151 gchar *s; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
152 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
153 if ((s = g_strstr_len(data, data_len, "Location: ")) != NULL) { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
154 gchar *new_url, *end; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
155 int len; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
156 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
157 s += strlen("Location: "); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
158 end = strchr(s, '\r'); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
159 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
160 /* Just in case :) */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
161 if (end == NULL) |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
162 end = strchr(s, '\n'); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
163 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
164 len = end - s; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
165 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
166 new_url = g_malloc(len + 1); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
167 strncpy(new_url, s, len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
168 new_url[len] = '\0'; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
169 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
170 /* Close the existing stuff. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
171 gaim_input_remove(gunk->inpa); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
172 close(sock); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
173 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
174 /* Try again, with this new location. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
175 grab_url(new_url, gunk->full, gunk->callback, |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
176 gunk->data); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
177 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
178 /* Free up. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
179 g_free(new_url); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
180 g_free(gunk->webdata); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
181 g_free(gunk->website); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
182 g_free(gunk->url); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
183 g_free(gunk); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
184 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
185 return TRUE; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
186 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
187 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
188 return FALSE; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
189 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
190 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
191 static size_t |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
192 parse_content_len(const char *data, size_t data_len) |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
193 { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
194 size_t content_len = 0; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
195 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
196 sscanf(data, "Content-Length: %d", &content_len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
197 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
198 return content_len; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
199 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
200 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
201 static void grab_url_callback(gpointer dat, gint sock, GaimInputCondition cond) |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
202 { |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
203 struct grab_url_data *gunk = dat; |
1 | 204 char data; |
205 | |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
206 if (sock == -1) { |
4322 | 207 gunk->callback(gunk->data, NULL, 0); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
208 g_free(gunk->website); |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
209 g_free(gunk->url); |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
210 g_free(gunk); |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
211 return; |
1087
56c7ceb986a8
[gaim-migrate @ 1097]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
691
diff
changeset
|
212 } |
1 | 213 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
214 if (!gunk->sentreq) { |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
215 char buf[256]; |
3717
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
216 |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
217 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n\r\n", gunk->full ? "" : "/", |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
218 gunk->full ? gunk->url : gunk->website->page); |
5211
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
219 |
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
220 gaim_debug(GAIM_DEBUG_MISC, "grab_url_callback", |
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
221 "Request: %s\n", buf); |
3717
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
222 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
223 write(sock, buf, strlen(buf)); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
224 fcntl(sock, F_SETFL, O_NONBLOCK); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
225 gunk->sentreq = TRUE; |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
226 gunk->inpa = gaim_input_add(sock, GAIM_INPUT_READ, grab_url_callback, dat); |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
227 gunk->data_len = 4096; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
228 gunk->webdata = g_malloc(gunk->data_len); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
229 return; |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
230 } |
1 | 231 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
232 if (read(sock, &data, 1) > 0 || errno == EWOULDBLOCK) { |
278
29e1669b006b
[gaim-migrate @ 288]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
268
diff
changeset
|
233 if (errno == EWOULDBLOCK) { |
29e1669b006b
[gaim-migrate @ 288]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
268
diff
changeset
|
234 errno = 0; |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
235 return; |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
236 } |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
237 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
238 gunk->len++; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
239 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
240 if (gunk->len == gunk->data_len + 1) { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
241 gunk->data_len += (gunk->data_len) / 2; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
242 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
243 gunk->webdata = g_realloc(gunk->webdata, gunk->data_len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
244 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
245 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
246 gunk->webdata[gunk->len - 1] = data; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
247 |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
248 if (!gunk->startsaving) { |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
249 if (data == '\r') |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
250 return; |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
251 if (data == '\n') { |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
252 if (gunk->newline) { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
253 size_t content_len; |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
254 gunk->startsaving = TRUE; |
4331
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
255 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
256 /* See if we can find a redirect. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
257 if (parse_redirect(gunk->webdata, gunk->len, sock, gunk)) |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
258 return; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
259 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
260 /* No redirect. See if we can find a content length. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
261 content_len = parse_content_len(gunk->webdata, gunk->len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
262 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
263 if (content_len == 0) { |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
264 /* We'll stick with an initial 8192 */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
265 content_len = 8192; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
266 } |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
267 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
268 /* Out with the old... */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
269 gunk->len = 0; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
270 g_free(gunk->webdata); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
271 gunk->webdata = NULL; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
272 |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
273 /* In with the new. */ |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
274 gunk->data_len = content_len; |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
275 gunk->webdata = g_malloc(gunk->data_len); |
bbd7b12986a8
[gaim-migrate @ 4595]
Christian Hammond <chipx86@chipx86.com>
parents:
4322
diff
changeset
|
276 } |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
277 else |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
278 gunk->newline = TRUE; |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
279 return; |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
280 } |
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
281 gunk->newline = FALSE; |
278
29e1669b006b
[gaim-migrate @ 288]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
268
diff
changeset
|
282 } |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
283 } else if (errno != ETIMEDOUT) { |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
284 gunk->webdata = g_realloc(gunk->webdata, gunk->len + 1); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
285 gunk->webdata[gunk->len] = 0; |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
286 |
5211
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
287 gaim_debug(GAIM_DEBUG_MISC, "grab_url_callback", |
0241d6b6702d
[gaim-migrate @ 5581]
Christian Hammond <chipx86@chipx86.com>
parents:
5176
diff
changeset
|
288 "Received: '%s'\n", gunk->webdata); |
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
289 |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
290 gaim_input_remove(gunk->inpa); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
291 close(sock); |
4322 | 292 gunk->callback(gunk->data, gunk->webdata, gunk->len); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
293 if (gunk->webdata) |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
294 g_free(gunk->webdata); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
295 g_free(gunk->website); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
296 g_free(gunk->url); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
297 g_free(gunk); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
298 } else { |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
299 gaim_input_remove(gunk->inpa); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
300 close(sock); |
4322 | 301 gunk->callback(gunk->data, NULL, 0); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
302 if (gunk->webdata) |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
303 g_free(gunk->webdata); |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
304 g_free(gunk->website); |
2369
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
305 g_free(gunk->url); |
117e9f0950b6
[gaim-migrate @ 2382]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2090
diff
changeset
|
306 g_free(gunk); |
1 | 307 } |
308 } | |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
309 |
4322 | 310 void grab_url(char *url, gboolean full, void callback(gpointer, char *, unsigned long), gpointer data) |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
311 { |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
312 int sock; |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
313 struct grab_url_data *gunk = g_new0(struct grab_url_data, 1); |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
314 |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
315 gunk->callback = callback; |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
316 gunk->data = data; |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
317 gunk->url = g_strdup(url); |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
318 gunk->website = parse_url(url); |
2584
34812d648f72
[gaim-migrate @ 2597]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2541
diff
changeset
|
319 gunk->full = full; |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
320 |
4634 | 321 if ((sock = proxy_connect(NULL, gunk->website->address, gunk->website->port, |
2372
2927c2c26fe6
[gaim-migrate @ 2385]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2369
diff
changeset
|
322 grab_url_callback, gunk)) < 0) { |
2541
8229710b343b
[gaim-migrate @ 2554]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
2417
diff
changeset
|
323 g_free(gunk->website); |
1881
a02584b98823
[gaim-migrate @ 1891]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1843
diff
changeset
|
324 g_free(gunk->url); |
a02584b98823
[gaim-migrate @ 1891]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1843
diff
changeset
|
325 g_free(gunk); |
4322 | 326 callback(data, g_strdup(_("g003: Error opening connection.\n")), 0); |
1840
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
327 } |
00aef397a1fe
[gaim-migrate @ 1850]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1250
diff
changeset
|
328 } |
5093 | 329 |
5104 | 330 struct gaim_parse_tag { |
331 char *src_tag; | |
332 char *dest_tag; | |
333 }; | |
334 | |
5093 | 335 #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ |
5176 | 336 const char *o = c + strlen("<" x); \ |
5141 | 337 const char *p = NULL, *q = NULL, *r = NULL; \ |
5176 | 338 GString *innards = g_string_new(""); \ |
339 while(o && *o) { \ | |
5141 | 340 if(!q && (*o == '\"' || *o == '\'') ) { \ |
341 q = o; \ | |
342 } else if(q) { \ | |
343 if(*o == *q) { \ | |
5176 | 344 char *unescaped = g_strndup(q+1, o-q-1); \ |
345 char *escaped = g_markup_escape_text(unescaped, -1); \ | |
346 g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ | |
5141 | 347 q = NULL; \ |
348 } else if(*c == '\\') { \ | |
349 o++; \ | |
350 } \ | |
351 } else if(*o == '<') { \ | |
352 r = o; \ | |
353 } else if(*o == '>') { \ | |
354 p = o; \ | |
355 break; \ | |
5176 | 356 } else { \ |
357 innards = g_string_append_c(innards, *o); \ | |
5141 | 358 } \ |
359 o++; \ | |
360 } \ | |
361 if(p && !r) { \ | |
5104 | 362 if(*(p-1) != '/') { \ |
363 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ | |
364 pt->src_tag = x; \ | |
365 pt->dest_tag = y; \ | |
366 tags = g_list_prepend(tags, pt); \ | |
367 } \ | |
5093 | 368 xhtml = g_string_append(xhtml, "<" y); \ |
369 c += strlen("<" x ); \ | |
5176 | 370 xhtml = g_string_append(xhtml, innards->str); \ |
371 xhtml = g_string_append_c(xhtml, '>'); \ | |
5093 | 372 c = p + 1; \ |
373 } else { \ | |
374 xhtml = g_string_append(xhtml, "<"); \ | |
5110 | 375 plain = g_string_append_c(plain, '<'); \ |
5176 | 376 c++; \ |
5093 | 377 } \ |
5176 | 378 g_string_free(innards, TRUE); \ |
5093 | 379 continue; \ |
380 } \ | |
381 if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ | |
382 (*(c+strlen("<" x)) == '>' || \ | |
383 !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ | |
384 xhtml = g_string_append(xhtml, "<" y); \ | |
385 c += strlen("<" x); \ | |
5104 | 386 if(*c != '/') { \ |
387 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ | |
388 pt->src_tag = x; \ | |
389 pt->dest_tag = y; \ | |
390 tags = g_list_prepend(tags, pt); \ | |
5110 | 391 xhtml = g_string_append_c(xhtml, '>'); \ |
392 } else { \ | |
393 xhtml = g_string_append(xhtml, "/>");\ | |
5104 | 394 } \ |
5110 | 395 c = strchr(c, '>') + 1; \ |
5093 | 396 continue; \ |
397 } | |
398 #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) | |
399 | |
5110 | 400 void html_to_xhtml(const char *html, char **xhtml_out, char **plain_out) { |
5093 | 401 GString *xhtml = g_string_new(""); |
5110 | 402 GString *plain = g_string_new(""); |
5093 | 403 GList *tags = NULL, *tag; |
5141 | 404 const char *c = html; |
5176 | 405 |
406 while(c && *c) { | |
5141 | 407 if(*c == '<') { |
5093 | 408 if(*(c+1) == '/') { /* closing tag */ |
409 tag = tags; | |
410 while(tag) { | |
5104 | 411 struct gaim_parse_tag *pt = tag->data; |
412 if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { | |
413 c += strlen(pt->src_tag) + 3; | |
5093 | 414 break; |
415 } | |
416 tag = tag->next; | |
417 } | |
418 if(tag) { | |
419 while(tags) { | |
5104 | 420 struct gaim_parse_tag *pt = tags->data; |
421 g_string_append_printf(xhtml, "</%s>", pt->dest_tag); | |
5093 | 422 if(tags == tag) |
423 break; | |
5104 | 424 tags = g_list_remove(tags, pt); |
425 g_free(pt); | |
5093 | 426 } |
5104 | 427 g_free(tag->data); |
5093 | 428 tags = g_list_remove(tags, tag->data); |
429 } else { | |
430 /* we tried to close a tag we never opened! escape it | |
431 * and move on */ | |
432 xhtml = g_string_append(xhtml, "<"); | |
5110 | 433 plain = g_string_append_c(plain, '<'); |
5093 | 434 c++; |
435 } | |
436 } else { /* opening tag */ | |
437 ALLOW_TAG("a"); | |
5101 | 438 ALLOW_TAG_ALT("b", "strong"); |
5093 | 439 ALLOW_TAG("blockquote"); |
5101 | 440 ALLOW_TAG_ALT("bold", "strong"); |
5093 | 441 ALLOW_TAG("cite"); |
442 ALLOW_TAG("div"); | |
443 ALLOW_TAG("em"); | |
444 ALLOW_TAG("h1"); | |
445 ALLOW_TAG("h2"); | |
446 ALLOW_TAG("h3"); | |
447 ALLOW_TAG("h4"); | |
448 ALLOW_TAG("h5"); | |
449 ALLOW_TAG("h6"); | |
450 ALLOW_TAG("html"); | |
5101 | 451 ALLOW_TAG_ALT("i", "em"); |
452 ALLOW_TAG_ALT("italic", "em"); | |
5093 | 453 ALLOW_TAG("li"); |
454 ALLOW_TAG("ol"); | |
455 ALLOW_TAG("p"); | |
456 ALLOW_TAG("pre"); | |
457 ALLOW_TAG("q"); | |
458 ALLOW_TAG("span"); | |
459 ALLOW_TAG("strong"); | |
460 ALLOW_TAG("ul"); | |
461 | |
5174 | 462 /* we skip <HR> because it's not legal in XHTML-IM. However, |
463 * we still want to send something sensible, so we put a | |
464 * linebreak in its place. <BR> also needs special handling | |
465 * because putting a </BR> to close it would just be dumb. */ | |
466 if((!g_ascii_strncasecmp(c, "<br", 3) | |
467 || !g_ascii_strncasecmp(c, "<hr", 3)) | |
468 && (*(c+3) == '>' || | |
469 !g_ascii_strncasecmp(c+3, "/>", 2) || | |
470 !g_ascii_strncasecmp(c+3, " />", 3))) { | |
471 c = strchr(c, '>') + 1; | |
472 xhtml = g_string_append(xhtml, "<br/>"); | |
473 if(*c != '\n') | |
474 plain = g_string_append_c(plain, '\n'); | |
475 continue; | |
476 } | |
477 if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { | |
5104 | 478 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); |
479 pt->src_tag = *(c+2) == '>' ? "u" : "underline"; | |
480 pt->dest_tag = "span"; | |
481 tags = g_list_prepend(tags, pt); | |
482 c = strchr(c, '>') + 1; | |
483 xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); | |
484 continue; | |
485 } | |
5174 | 486 if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { |
5104 | 487 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); |
488 pt->src_tag = *(c+2) == '>' ? "s" : "strike"; | |
489 pt->dest_tag = "span"; | |
490 tags = g_list_prepend(tags, pt); | |
491 c = strchr(c, '>') + 1; | |
492 xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); | |
493 continue; | |
494 } | |
495 if(!g_ascii_strncasecmp(c, "<sub>", 5)) { | |
496 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
497 pt->src_tag = "sub"; | |
498 pt->dest_tag = "span"; | |
499 tags = g_list_prepend(tags, pt); | |
500 c = strchr(c, '>') + 1; | |
501 xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); | |
502 continue; | |
503 } | |
504 if(!g_ascii_strncasecmp(c, "<sup>", 5)) { | |
505 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
506 pt->src_tag = "sup"; | |
507 pt->dest_tag = "span"; | |
508 tags = g_list_prepend(tags, pt); | |
509 c = strchr(c, '>') + 1; | |
510 xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); | |
511 continue; | |
512 } | |
5107 | 513 if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { |
514 const char *p = c; | |
515 GString *style = g_string_new(""); | |
516 struct gaim_parse_tag *pt; | |
517 while(*p && *p != '>') { | |
518 if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) { | |
519 const char *q = p + strlen("color="); | |
520 GString *color = g_string_new(""); | |
521 if(*q == '\'' || *q == '\"') | |
522 q++; | |
523 while(*q && *q != '\"' && *q != '\'' && *q != ' ') { | |
524 color = g_string_append_c(color, *q); | |
525 q++; | |
526 } | |
527 g_string_append_printf(style, "color: %s; ", color->str); | |
528 g_string_free(color, TRUE); | |
529 p = q; | |
530 } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) { | |
531 const char *q = p + strlen("face="); | |
532 gboolean space_allowed = FALSE; | |
533 GString *face = g_string_new(""); | |
534 if(*q == '\'' || *q == '\"') { | |
535 space_allowed = TRUE; | |
536 q++; | |
537 } | |
538 while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) { | |
539 face = g_string_append_c(face, *q); | |
540 q++; | |
541 } | |
542 g_string_append_printf(style, "font-family: %s; ", face->str); | |
543 g_string_free(face, TRUE); | |
544 p = q; | |
545 } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) { | |
546 const char *q = p + strlen("size="); | |
547 int sz; | |
548 const char *size = "medium"; | |
549 if(*q == '\'' || *q == '\"') | |
550 q++; | |
551 sz = atoi(q); | |
552 if(sz < 3) | |
553 size = "smaller"; | |
554 else if(sz > 3) | |
555 size = "larger"; | |
556 g_string_append_printf(style, "font-size: %s; ", size); | |
557 p = q; | |
558 } | |
559 p++; | |
560 } | |
561 c = strchr(c, '>') + 1; | |
562 pt = g_new0(struct gaim_parse_tag, 1); | |
563 pt->src_tag = "font"; | |
564 pt->dest_tag = "span"; | |
565 tags = g_list_prepend(tags, pt); | |
566 xhtml = g_string_append(xhtml, "<span"); | |
567 if(style->len) | |
568 g_string_append_printf(xhtml, " style='%s'", style->str); | |
569 xhtml = g_string_append_c(xhtml, '>'); | |
570 g_string_free(style, TRUE); | |
571 continue; | |
572 } | |
573 if(!g_ascii_strncasecmp(c, "<body ", 6)) { | |
574 const char *p = c; | |
575 gboolean did_something = FALSE; | |
576 while(*p && *p != '>') { | |
577 if(!g_ascii_strncasecmp(p, "bgcolor=", strlen("bgcolor="))) { | |
578 const char *q = p + strlen("bgcolor="); | |
579 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
580 GString *color = g_string_new(""); | |
581 if(*q == '\'' || *q == '\"') | |
582 q++; | |
583 while(*q && *q != '\"' && *q != '\'' && *q != ' ') { | |
584 color = g_string_append_c(color, *q); | |
585 q++; | |
586 } | |
587 g_string_append_printf(xhtml, "<span style='background: %s;'>", color->str); | |
588 g_string_free(color, TRUE); | |
589 c = strchr(c, '>') + 1; | |
590 pt->src_tag = "body"; | |
591 pt->dest_tag = "span"; | |
592 tags = g_list_prepend(tags, pt); | |
593 did_something = TRUE; | |
594 break; | |
595 } | |
596 p++; | |
597 } | |
598 if(did_something) continue; | |
599 } | |
600 /* this has to come after the special case for bgcolor */ | |
601 ALLOW_TAG("body"); | |
5093 | 602 if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { |
603 char *p = strstr(c + strlen("<!--"), "-->"); | |
604 if(p) { | |
605 xhtml = g_string_append(xhtml, "<!--"); | |
606 c += strlen("<!--"); | |
607 continue; | |
608 } | |
609 } | |
610 | |
611 xhtml = g_string_append(xhtml, "<"); | |
5110 | 612 plain = g_string_append_c(plain, '<'); |
5093 | 613 c++; |
614 } | |
615 } else { | |
616 xhtml = g_string_append_c(xhtml, *c); | |
5110 | 617 plain = g_string_append_c(plain, *c); |
5093 | 618 c++; |
619 } | |
620 } | |
621 tag = tags; | |
622 while(tag) { | |
623 g_string_append_printf(xhtml, "</%s>", (char *)tag->data); | |
624 tag = tag->next; | |
625 } | |
626 g_list_free(tags); | |
5110 | 627 if(xhtml_out) |
628 *xhtml_out = g_strdup(xhtml->str); | |
629 if(plain_out) | |
630 *plain_out = g_strdup(plain->str); | |
5093 | 631 g_string_free(xhtml, TRUE); |
5110 | 632 g_string_free(plain, TRUE); |
5093 | 633 } |