Mercurial > pidgin.yaz
comparison src/util.c @ 9222:316b1afb5974
[gaim-migrate @ 10018]
nosnilmot make autolinkification work even better.
This patch improves the way we auto-linkify detected
URLs and email addresses:
- if a URL is enclosed in parenthesis eg
(http://www.google.com/), the closing parenthesis will
not be included as part of the link
- email addresses inside parenthesis or <>'s will
correctly be linked
- either whitespace or non-ascii characters are
recognized as boundaries for email addresses, which
will allow auto linking of email addresses for Chinese
conversations which don't use whitespace:
<stu|laptop> wing: you know you mentioned being able to
auto-linkify email addresses in Chinese text (without
whitespace), do you want to try
a patch out for me?
<wing> stu|laptop: eh, actually i found that gaim cannot
<stu|laptop> cannot? at all?
<wing> stu|laptop: it seems so
stu|laptop> even with this? <link to patch>
<wing> stu|laptop: i'll check it out then :)
... ...
<wing> stu|laptop: it works perfectly with the patch :)
committer: Tailor Script <tailor@pidgin.im>
author | Tim Ringenbach <marv@pidgin.im> |
---|---|
date | Sun, 06 Jun 2004 19:06:22 +0000 |
parents | f0488214826f |
children | 9171e528d7e5 |
comparison
equal
deleted
inserted
replaced
9221:8054855f2bb9 | 9222:316b1afb5974 |
---|---|
1354 gaim_markup_linkify(const char *text) | 1354 gaim_markup_linkify(const char *text) |
1355 { | 1355 { |
1356 const char *c, *t, *q = NULL; | 1356 const char *c, *t, *q = NULL; |
1357 char *tmp, *tmpurlbuf; | 1357 char *tmp, *tmpurlbuf; |
1358 char url_buf[BUF_LEN * 4]; | 1358 char url_buf[BUF_LEN * 4]; |
1359 gunichar g; | |
1359 gboolean inside_html = FALSE; | 1360 gboolean inside_html = FALSE; |
1361 int inside_paren = 0; | |
1360 GString *ret = g_string_new(""); | 1362 GString *ret = g_string_new(""); |
1361 /* Assumes you have a buffer able to cary at least BUF_LEN * 2 bytes */ | 1363 /* Assumes you have a buffer able to cary at least BUF_LEN * 2 bytes */ |
1362 | 1364 |
1363 c = text; | 1365 c = text; |
1364 while (*c) { | 1366 while (*c) { |
1367 | |
1368 if(*c == '(' && !inside_html) { | |
1369 inside_paren++; | |
1370 ret = g_string_append_c(ret, *c); | |
1371 c++; | |
1372 } | |
1373 | |
1365 if(inside_html) { | 1374 if(inside_html) { |
1366 if(*c == '>') { | 1375 if(*c == '>') { |
1367 inside_html = FALSE; | 1376 inside_html = FALSE; |
1368 } else if(!q && (*c == '\"' || *c == '\'')) { | 1377 } else if(!q && (*c == '\"' || *c == '\'')) { |
1369 q = c; | 1378 q = c; |
1396 continue; | 1405 continue; |
1397 } | 1406 } |
1398 | 1407 |
1399 if (*(t - 1) == '.') | 1408 if (*(t - 1) == '.') |
1400 t--; | 1409 t--; |
1410 if ((*(t - 1) == ')' && (inside_paren > 0))) { | |
1411 t--; | |
1412 } | |
1401 strncpy(url_buf, c, t - c); | 1413 strncpy(url_buf, c, t - c); |
1402 url_buf[t - c] = 0; | 1414 url_buf[t - c] = 0; |
1403 tmpurlbuf = gaim_unescape_html(url_buf); | 1415 tmpurlbuf = gaim_unescape_html(url_buf); |
1404 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", | 1416 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", |
1405 tmpurlbuf, url_buf); | 1417 tmpurlbuf, url_buf); |
1426 continue; | 1438 continue; |
1427 } | 1439 } |
1428 | 1440 |
1429 if (*(t - 1) == '.') | 1441 if (*(t - 1) == '.') |
1430 t--; | 1442 t--; |
1443 if ((*(t - 1) == ')' && (inside_paren > 0))) { | |
1444 t--; | |
1445 } | |
1431 strncpy(url_buf, c, t - c); | 1446 strncpy(url_buf, c, t - c); |
1432 url_buf[t - c] = 0; | 1447 url_buf[t - c] = 0; |
1433 tmpurlbuf = gaim_unescape_html(url_buf); | 1448 tmpurlbuf = gaim_unescape_html(url_buf); |
1434 g_string_append_printf(ret, | 1449 g_string_append_printf(ret, |
1435 "<A HREF=\"http://%s\">%s</A>", tmpurlbuf, | 1450 "<A HREF=\"http://%s\">%s</A>", tmpurlbuf, |
1447 t = c; | 1462 t = c; |
1448 while (1) { | 1463 while (1) { |
1449 if (badchar(*t)) { | 1464 if (badchar(*t)) { |
1450 if (*(t - 1) == '.') | 1465 if (*(t - 1) == '.') |
1451 t--; | 1466 t--; |
1467 if ((*(t - 1) == ')' && (inside_paren > 0))) { | |
1468 t--; | |
1469 } | |
1452 strncpy(url_buf, c, t - c); | 1470 strncpy(url_buf, c, t - c); |
1453 url_buf[t - c] = 0; | 1471 url_buf[t - c] = 0; |
1454 tmpurlbuf = gaim_unescape_html(url_buf); | 1472 tmpurlbuf = gaim_unescape_html(url_buf); |
1455 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", | 1473 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", |
1456 tmpurlbuf, url_buf); | 1474 tmpurlbuf, url_buf); |
1471 if (t - c == 4) { | 1489 if (t - c == 4) { |
1472 break; | 1490 break; |
1473 } | 1491 } |
1474 if (*(t - 1) == '.') | 1492 if (*(t - 1) == '.') |
1475 t--; | 1493 t--; |
1494 if ((*(t - 1) == ')' && (inside_paren > 0))) { | |
1495 t--; | |
1496 } | |
1476 strncpy(url_buf, c, t - c); | 1497 strncpy(url_buf, c, t - c); |
1477 url_buf[t - c] = 0; | 1498 url_buf[t - c] = 0; |
1478 tmpurlbuf = gaim_unescape_html(url_buf); | 1499 tmpurlbuf = gaim_unescape_html(url_buf); |
1479 g_string_append_printf(ret, | 1500 g_string_append_printf(ret, |
1480 "<A HREF=\"ftp://%s\">%s</A>", tmpurlbuf, | 1501 "<A HREF=\"ftp://%s\">%s</A>", tmpurlbuf, |
1507 break; | 1528 break; |
1508 t++; | 1529 t++; |
1509 | 1530 |
1510 } | 1531 } |
1511 } else if (c != text && (*c == '@')) { | 1532 } else if (c != text && (*c == '@')) { |
1512 char *tmp; | |
1513 int flag; | 1533 int flag; |
1514 int len = 0; | 1534 GString *gurl_buf; |
1515 const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0"; | 1535 const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0"; |
1516 url_buf[0] = 0; | 1536 url_buf[0] = 0; |
1517 | 1537 |
1518 if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1))) | 1538 if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1))) |
1519 flag = 0; | 1539 flag = 0; |
1520 else | 1540 else |
1521 flag = 1; | 1541 flag = 1; |
1522 | 1542 |
1523 t = c; | 1543 t = c; |
1544 gurl_buf = g_string_new(""); | |
1524 while (flag) { | 1545 while (flag) { |
1525 if (badchar(*t)) { | 1546 /* iterate backwards grabbing the local part of an email address */ |
1526 ret = g_string_truncate(ret, ret->len - (len - 1)); | 1547 g = g_utf8_get_char(t); |
1548 if (badchar(*t) || (g >= 127) || (*t == '(') || | |
1549 ((*t == ';') && (t > (text+2)) && !g_ascii_strncasecmp(t - 3, "<", 4))) { | |
1550 /* local part will already be part of ret, strip it out */ | |
1551 ret = g_string_truncate(ret, ret->len - (c - t)); | |
1552 ret = g_string_append_unichar(ret, g); | |
1527 break; | 1553 break; |
1528 } else { | 1554 } else { |
1529 len++; | 1555 g_string_prepend_unichar(gurl_buf, g); |
1530 tmp = g_malloc(len + 1); | 1556 t = g_utf8_find_prev_char(text, t); |
1531 tmp[len] = 0; | |
1532 tmp[0] = *t; | |
1533 strncpy(tmp + 1, url_buf, len - 1); | |
1534 strcpy(url_buf, tmp); | |
1535 url_buf[len] = 0; | |
1536 g_free(tmp); | |
1537 t--; | |
1538 if (t < text) { | 1557 if (t < text) { |
1539 ret = g_string_assign(ret, ""); | 1558 ret = g_string_assign(ret, ""); |
1540 break; | 1559 break; |
1541 } | 1560 } |
1542 } | 1561 } |
1543 } | 1562 } |
1544 | 1563 |
1545 t = c + 1; | 1564 t = g_utf8_find_next_char(c, NULL); |
1546 | 1565 |
1547 while (flag) { | 1566 while (flag) { |
1548 if (badchar(*t)) { | 1567 /* iterate forwards grabbing the domain part of an email address */ |
1568 g = g_utf8_get_char(t); | |
1569 if (badchar(*t) || (g >= 127) || (*t == ')') || | |
1570 ((*t == '&') && !g_ascii_strncasecmp(t, ">", 4))) { | |
1549 char *d; | 1571 char *d; |
1550 | 1572 |
1573 strcpy(url_buf, gurl_buf->str); | |
1574 | |
1575 /* strip off trailing periods */ | |
1551 for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--) | 1576 for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--) |
1552 *d = '\0'; | 1577 *d = '\0'; |
1553 | 1578 |
1554 tmpurlbuf = gaim_unescape_html(url_buf); | 1579 tmpurlbuf = gaim_unescape_html(url_buf); |
1555 if (gaim_email_is_valid(tmpurlbuf)) { | 1580 if (gaim_email_is_valid(tmpurlbuf)) { |
1561 g_free(tmpurlbuf); | 1586 g_free(tmpurlbuf); |
1562 c = t; | 1587 c = t; |
1563 | 1588 |
1564 break; | 1589 break; |
1565 } else { | 1590 } else { |
1566 strncat(url_buf, t, 1); | 1591 g_string_append_unichar(gurl_buf, g); |
1567 len++; | 1592 t = g_utf8_find_next_char(t, NULL); |
1568 url_buf[len] = 0; | 1593 } |
1569 } | 1594 } |
1570 | 1595 g_string_free(gurl_buf, TRUE); |
1571 t++; | 1596 } |
1572 } | 1597 |
1598 if(*c == ')' && !inside_html) { | |
1599 inside_paren--; | |
1600 ret = g_string_append_c(ret, *c); | |
1601 c++; | |
1573 } | 1602 } |
1574 | 1603 |
1575 if (*c == 0) | 1604 if (*c == 0) |
1576 break; | 1605 break; |
1577 | 1606 |