comparison src/util.c @ 7095:c8bf2da398e3

[gaim-migrate @ 7660] html.[ch] is gone. Everything inside was namespaced and put in util.[ch]. One less ugly part of gaim in the tree. committer: Tailor Script <tailor@pidgin.im>
author Christian Hammond <chipx86@chipx86.com>
date Wed, 01 Oct 2003 03:43:18 +0000
parents 2343c3aa1dec
children 8130adad8b7a
comparison
equal deleted inserted replaced
7094:2343c3aa1dec 7095:c8bf2da398e3
1 /* 1 /*
2 * @file util.h Utility Functions 2 * @file util.h Utility Functions
3 * @ingroup core 3 * @ingroup core
4 * 4 *
5 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net> 5 * Copyright (C) 1998-1999 Mark Spencer <markster@marko.net>
6 * Copyright (C) 2003 Christian Hammond <chipx86@gnupdate.org> 6 * 2003 Christian Hammond <chipx86@gnupdate.org>
7 * 2003 Nathan Walp <faceprint@faceprint.com>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or 11 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version. 12 * (at your option) any later version.
1167 1168
1168 return found; 1169 return found;
1169 } 1170 }
1170 1171
1171 gboolean 1172 gboolean
1173 gaim_markup_extract_info_field(const char *str, char *dest_buffer,
1174 const char *start_token, int skip,
1175 const char *end_token, char check_value,
1176 const char *no_value_token,
1177 const char *display_name, gboolean is_link,
1178 const char *link_prefix)
1179 {
1180 const char *p, *q;
1181 char buf[1024];
1182
1183 g_return_val_if_fail(str != NULL, FALSE);
1184 g_return_val_if_fail(dest_buffer != NULL, FALSE);
1185 g_return_val_if_fail(start_token != NULL, FALSE);
1186 g_return_val_if_fail(end_token != NULL, FALSE);
1187 g_return_val_if_fail(display_name != NULL, FALSE);
1188
1189 p = strstr(str, start_token);
1190
1191 if (p == NULL)
1192 return FALSE;
1193
1194 p += strlen(start_token) + skip;
1195
1196 if (check_value != '\0' && *p == check_value)
1197 return FALSE;
1198
1199 q = strstr(p, end_token);
1200
1201 if (q != NULL && (!no_value_token ||
1202 (no_value_token && strncmp(p, no_value_token,
1203 strlen(no_value_token)))))
1204 {
1205 strcat(dest_buffer, "<b>");
1206 strcat(dest_buffer, display_name);
1207 strcat(dest_buffer, ":</b> ");
1208
1209 if (is_link)
1210 {
1211 strcat(dest_buffer, "<br><a href=\"");
1212 memcpy(buf, p, q - p);
1213 buf[q - p] = '\0';
1214
1215 if (link_prefix)
1216 strcat(dest_buffer, link_prefix);
1217
1218 strcat(dest_buffer, buf);
1219 strcat(dest_buffer, "\">");
1220
1221 if (link_prefix)
1222 strcat(dest_buffer, link_prefix);
1223
1224 strcat(dest_buffer, buf);
1225 strcat(dest_buffer, "</a>");
1226 }
1227 else
1228 {
1229 memcpy(buf, p, q - p);
1230 buf[q - p] = '\0';
1231 strcat(dest_buffer, buf);
1232 }
1233
1234 strcat(dest_buffer, "<br>\n");
1235
1236 return TRUE;
1237 }
1238
1239 return FALSE;
1240 }
1241
1242 struct gaim_parse_tag {
1243 char *src_tag;
1244 char *dest_tag;
1245 };
1246
1247 #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
1248 const char *o = c + strlen("<" x); \
1249 const char *p = NULL, *q = NULL, *r = NULL; \
1250 GString *innards = g_string_new(""); \
1251 while(o && *o) { \
1252 if(!q && (*o == '\"' || *o == '\'') ) { \
1253 q = o; \
1254 } else if(q) { \
1255 if(*o == *q) { \
1256 char *unescaped = g_strndup(q+1, o-q-1); \
1257 char *escaped = g_markup_escape_text(unescaped, -1); \
1258 g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
1259 g_free(unescaped); \
1260 g_free(escaped); \
1261 q = NULL; \
1262 } else if(*c == '\\') { \
1263 o++; \
1264 } \
1265 } else if(*o == '<') { \
1266 r = o; \
1267 } else if(*o == '>') { \
1268 p = o; \
1269 break; \
1270 } else { \
1271 innards = g_string_append_c(innards, *o); \
1272 } \
1273 o++; \
1274 } \
1275 if(p && !r) { \
1276 if(*(p-1) != '/') { \
1277 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \
1278 pt->src_tag = x; \
1279 pt->dest_tag = y; \
1280 tags = g_list_prepend(tags, pt); \
1281 } \
1282 xhtml = g_string_append(xhtml, "<" y); \
1283 c += strlen("<" x ); \
1284 xhtml = g_string_append(xhtml, innards->str); \
1285 xhtml = g_string_append_c(xhtml, '>'); \
1286 c = p + 1; \
1287 } else { \
1288 xhtml = g_string_append(xhtml, "&lt;"); \
1289 plain = g_string_append_c(plain, '<'); \
1290 c++; \
1291 } \
1292 g_string_free(innards, TRUE); \
1293 continue; \
1294 } \
1295 if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
1296 (*(c+strlen("<" x)) == '>' || \
1297 !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
1298 xhtml = g_string_append(xhtml, "<" y); \
1299 c += strlen("<" x); \
1300 if(*c != '/') { \
1301 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \
1302 pt->src_tag = x; \
1303 pt->dest_tag = y; \
1304 tags = g_list_prepend(tags, pt); \
1305 xhtml = g_string_append_c(xhtml, '>'); \
1306 } else { \
1307 xhtml = g_string_append(xhtml, "/>");\
1308 } \
1309 c = strchr(c, '>') + 1; \
1310 continue; \
1311 }
1312 #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
1313 void
1314 gaim_markup_html_to_xhtml(const char *html, char **xhtml_out,
1315 char **plain_out)
1316 {
1317 GString *xhtml = g_string_new("");
1318 GString *plain = g_string_new("");
1319 GList *tags = NULL, *tag;
1320 const char *c = html;
1321
1322 while(c && *c) {
1323 if(*c == '<') {
1324 if(*(c+1) == '/') { /* closing tag */
1325 tag = tags;
1326 while(tag) {
1327 struct gaim_parse_tag *pt = tag->data;
1328 if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
1329 c += strlen(pt->src_tag) + 3;
1330 break;
1331 }
1332 tag = tag->next;
1333 }
1334 if(tag) {
1335 while(tags) {
1336 struct gaim_parse_tag *pt = tags->data;
1337 g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
1338 if(tags == tag)
1339 break;
1340 tags = g_list_remove(tags, pt);
1341 g_free(pt);
1342 }
1343 g_free(tag->data);
1344 tags = g_list_remove(tags, tag->data);
1345 } else {
1346 /* we tried to close a tag we never opened! escape it
1347 * and move on */
1348 xhtml = g_string_append(xhtml, "&lt;");
1349 plain = g_string_append_c(plain, '<');
1350 c++;
1351 }
1352 } else { /* opening tag */
1353 ALLOW_TAG("a");
1354 ALLOW_TAG_ALT("b", "strong");
1355 ALLOW_TAG("blockquote");
1356 ALLOW_TAG_ALT("bold", "strong");
1357 ALLOW_TAG("cite");
1358 ALLOW_TAG("div");
1359 ALLOW_TAG("em");
1360 ALLOW_TAG("h1");
1361 ALLOW_TAG("h2");
1362 ALLOW_TAG("h3");
1363 ALLOW_TAG("h4");
1364 ALLOW_TAG("h5");
1365 ALLOW_TAG("h6");
1366 /* we only allow html to start the message */
1367 if(c == html)
1368 ALLOW_TAG("html");
1369 ALLOW_TAG_ALT("i", "em");
1370 ALLOW_TAG_ALT("italic", "em");
1371 ALLOW_TAG("li");
1372 ALLOW_TAG("ol");
1373 ALLOW_TAG("p");
1374 ALLOW_TAG("pre");
1375 ALLOW_TAG("q");
1376 ALLOW_TAG("span");
1377 ALLOW_TAG("strong");
1378 ALLOW_TAG("ul");
1379
1380 /* we skip <HR> because it's not legal in XHTML-IM. However,
1381 * we still want to send something sensible, so we put a
1382 * linebreak in its place. <BR> also needs special handling
1383 * because putting a </BR> to close it would just be dumb. */
1384 if((!g_ascii_strncasecmp(c, "<br", 3)
1385 || !g_ascii_strncasecmp(c, "<hr", 3))
1386 && (*(c+3) == '>' ||
1387 !g_ascii_strncasecmp(c+3, "/>", 2) ||
1388 !g_ascii_strncasecmp(c+3, " />", 3))) {
1389 c = strchr(c, '>') + 1;
1390 xhtml = g_string_append(xhtml, "<br/>");
1391 if(*c != '\n')
1392 plain = g_string_append_c(plain, '\n');
1393 continue;
1394 }
1395 if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
1396 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1);
1397 pt->src_tag = *(c+2) == '>' ? "u" : "underline";
1398 pt->dest_tag = "span";
1399 tags = g_list_prepend(tags, pt);
1400 c = strchr(c, '>') + 1;
1401 xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
1402 continue;
1403 }
1404 if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
1405 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1);
1406 pt->src_tag = *(c+2) == '>' ? "s" : "strike";
1407 pt->dest_tag = "span";
1408 tags = g_list_prepend(tags, pt);
1409 c = strchr(c, '>') + 1;
1410 xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
1411 continue;
1412 }
1413 if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
1414 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1);
1415 pt->src_tag = "sub";
1416 pt->dest_tag = "span";
1417 tags = g_list_prepend(tags, pt);
1418 c = strchr(c, '>') + 1;
1419 xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
1420 continue;
1421 }
1422 if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
1423 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1);
1424 pt->src_tag = "sup";
1425 pt->dest_tag = "span";
1426 tags = g_list_prepend(tags, pt);
1427 c = strchr(c, '>') + 1;
1428 xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
1429 continue;
1430 }
1431 if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
1432 const char *p = c;
1433 GString *style = g_string_new("");
1434 struct gaim_parse_tag *pt;
1435 while(*p && *p != '>') {
1436 if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) {
1437 const char *q = p + strlen("color=");
1438 GString *color = g_string_new("");
1439 if(*q == '\'' || *q == '\"')
1440 q++;
1441 while(*q && *q != '\"' && *q != '\'' && *q != ' ') {
1442 color = g_string_append_c(color, *q);
1443 q++;
1444 }
1445 g_string_append_printf(style, "color: %s; ", color->str);
1446 g_string_free(color, TRUE);
1447 p = q;
1448 } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) {
1449 const char *q = p + strlen("face=");
1450 gboolean space_allowed = FALSE;
1451 GString *face = g_string_new("");
1452 if(*q == '\'' || *q == '\"') {
1453 space_allowed = TRUE;
1454 q++;
1455 }
1456 while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) {
1457 face = g_string_append_c(face, *q);
1458 q++;
1459 }
1460 g_string_append_printf(style, "font-family: %s; ", face->str);
1461 g_string_free(face, TRUE);
1462 p = q;
1463 } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) {
1464 const char *q = p + strlen("size=");
1465 int sz;
1466 const char *size = "medium";
1467 if(*q == '\'' || *q == '\"')
1468 q++;
1469 sz = atoi(q);
1470 if(sz < 3)
1471 size = "smaller";
1472 else if(sz > 3)
1473 size = "larger";
1474 g_string_append_printf(style, "font-size: %s; ", size);
1475 p = q;
1476 }
1477 p++;
1478 }
1479 c = strchr(c, '>') + 1;
1480 pt = g_new0(struct gaim_parse_tag, 1);
1481 pt->src_tag = "font";
1482 pt->dest_tag = "span";
1483 tags = g_list_prepend(tags, pt);
1484 xhtml = g_string_append(xhtml, "<span");
1485 if(style->len)
1486 g_string_append_printf(xhtml, " style='%s'", style->str);
1487 xhtml = g_string_append_c(xhtml, '>');
1488 g_string_free(style, TRUE);
1489 continue;
1490 }
1491 if(!g_ascii_strncasecmp(c, "<body ", 6)) {
1492 const char *p = c;
1493 gboolean did_something = FALSE;
1494 while(*p && *p != '>') {
1495 if(!g_ascii_strncasecmp(p, "bgcolor=", strlen("bgcolor="))) {
1496 const char *q = p + strlen("bgcolor=");
1497 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1);
1498 GString *color = g_string_new("");
1499 if(*q == '\'' || *q == '\"')
1500 q++;
1501 while(*q && *q != '\"' && *q != '\'' && *q != ' ') {
1502 color = g_string_append_c(color, *q);
1503 q++;
1504 }
1505 g_string_append_printf(xhtml, "<span style='background: %s;'>", color->str);
1506 g_string_free(color, TRUE);
1507 c = strchr(c, '>') + 1;
1508 pt->src_tag = "body";
1509 pt->dest_tag = "span";
1510 tags = g_list_prepend(tags, pt);
1511 did_something = TRUE;
1512 break;
1513 }
1514 p++;
1515 }
1516 if(did_something) continue;
1517 }
1518 /* this has to come after the special case for bgcolor */
1519 ALLOW_TAG("body");
1520 if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
1521 char *p = strstr(c + strlen("<!--"), "-->");
1522 if(p) {
1523 xhtml = g_string_append(xhtml, "<!--");
1524 c += strlen("<!--");
1525 continue;
1526 }
1527 }
1528
1529 xhtml = g_string_append(xhtml, "&lt;");
1530 plain = g_string_append_c(plain, '<');
1531 c++;
1532 }
1533 } else {
1534 xhtml = g_string_append_c(xhtml, *c);
1535 plain = g_string_append_c(plain, *c);
1536 c++;
1537 }
1538 }
1539 tag = tags;
1540 while(tag) {
1541 g_string_append_printf(xhtml, "</%s>", (char *)tag->data);
1542 tag = tag->next;
1543 }
1544 g_list_free(tags);
1545 if(xhtml_out)
1546 *xhtml_out = g_strdup(xhtml->str);
1547 if(plain_out)
1548 *plain_out = g_strdup(plain->str);
1549 g_string_free(xhtml, TRUE);
1550 g_string_free(plain, TRUE);
1551 }
1552
1553 char *
1554 gaim_markup_strip_html(const char *str)
1555 {
1556 int i, j, k;
1557 gboolean visible = TRUE;
1558 gchar *str2;
1559
1560 g_return_val_if_fail(str != NULL, NULL);
1561
1562 str2 = g_strdup(str);
1563
1564 for (i = 0, j = 0; str2[i]; i++)
1565 {
1566 if (str2[i] == '<')
1567 {
1568 k = i + 1;
1569
1570 if(g_ascii_isspace(str2[k]))
1571 visible = TRUE;
1572 else
1573 {
1574 while (str2[k])
1575 {
1576 if (str2[k] == '<')
1577 {
1578 visible = TRUE;
1579 break;
1580 }
1581
1582 if (str2[k] == '>')
1583 {
1584 visible = FALSE;
1585 break;
1586 }
1587
1588 k++;
1589 }
1590 }
1591 }
1592 else if (str2[i] == '>' && !visible)
1593 {
1594 visible = TRUE;
1595 continue;
1596 }
1597
1598 if (str2[i] == '&' && strncasecmp(str2 + i, "&quot;", 6) == 0)
1599 {
1600 str2[j++] = '\"';
1601 i = i + 5;
1602 continue;
1603 }
1604
1605 if (visible)
1606 str2[j++] = str2[i];
1607 }
1608
1609 str2[j] = '\0';
1610
1611 return str2;
1612 }
1613
1614 gboolean
1172 gaim_url_parse(const char *url, char **ret_host, int *ret_port, 1615 gaim_url_parse(const char *url, char **ret_host, int *ret_port,
1173 char **ret_path) 1616 char **ret_path)
1174 { 1617 {
1175 char scan_info[255]; 1618 char scan_info[255];
1176 char port_str[5]; 1619 char port_str[5];