comparison src/util.c @ 9222:316b1afb5974

[gaim-migrate @ 10018] nosnilmot make autolinkification work even better. This patch improves the way we auto-linkify detected URLs and email addresses: - if a URL is enclosed in parenthesis eg (http://www.google.com/), the closing parenthesis will not be included as part of the link - email addresses inside parenthesis or <>'s will correctly be linked - either whitespace or non-ascii characters are recognized as boundaries for email addresses, which will allow auto linking of email addresses for Chinese conversations which don't use whitespace: <stu|laptop> wing: you know you mentioned being able to auto-linkify email addresses in Chinese text (without whitespace), do you want to try a patch out for me? <wing> stu|laptop: eh, actually i found that gaim cannot <stu|laptop> cannot? at all? <wing> stu|laptop: it seems so stu|laptop> even with this? <link to patch> <wing> stu|laptop: i'll check it out then :) ... ... <wing> stu|laptop: it works perfectly with the patch :) committer: Tailor Script <tailor@pidgin.im>
author Tim Ringenbach <marv@pidgin.im>
date Sun, 06 Jun 2004 19:06:22 +0000
parents f0488214826f
children 9171e528d7e5
comparison
equal deleted inserted replaced
9221:8054855f2bb9 9222:316b1afb5974
1354 gaim_markup_linkify(const char *text) 1354 gaim_markup_linkify(const char *text)
1355 { 1355 {
1356 const char *c, *t, *q = NULL; 1356 const char *c, *t, *q = NULL;
1357 char *tmp, *tmpurlbuf; 1357 char *tmp, *tmpurlbuf;
1358 char url_buf[BUF_LEN * 4]; 1358 char url_buf[BUF_LEN * 4];
1359 gunichar g;
1359 gboolean inside_html = FALSE; 1360 gboolean inside_html = FALSE;
1361 int inside_paren = 0;
1360 GString *ret = g_string_new(""); 1362 GString *ret = g_string_new("");
1361 /* Assumes you have a buffer able to cary at least BUF_LEN * 2 bytes */ 1363 /* Assumes you have a buffer able to cary at least BUF_LEN * 2 bytes */
1362 1364
1363 c = text; 1365 c = text;
1364 while (*c) { 1366 while (*c) {
1367
1368 if(*c == '(' && !inside_html) {
1369 inside_paren++;
1370 ret = g_string_append_c(ret, *c);
1371 c++;
1372 }
1373
1365 if(inside_html) { 1374 if(inside_html) {
1366 if(*c == '>') { 1375 if(*c == '>') {
1367 inside_html = FALSE; 1376 inside_html = FALSE;
1368 } else if(!q && (*c == '\"' || *c == '\'')) { 1377 } else if(!q && (*c == '\"' || *c == '\'')) {
1369 q = c; 1378 q = c;
1396 continue; 1405 continue;
1397 } 1406 }
1398 1407
1399 if (*(t - 1) == '.') 1408 if (*(t - 1) == '.')
1400 t--; 1409 t--;
1410 if ((*(t - 1) == ')' && (inside_paren > 0))) {
1411 t--;
1412 }
1401 strncpy(url_buf, c, t - c); 1413 strncpy(url_buf, c, t - c);
1402 url_buf[t - c] = 0; 1414 url_buf[t - c] = 0;
1403 tmpurlbuf = gaim_unescape_html(url_buf); 1415 tmpurlbuf = gaim_unescape_html(url_buf);
1404 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", 1416 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
1405 tmpurlbuf, url_buf); 1417 tmpurlbuf, url_buf);
1426 continue; 1438 continue;
1427 } 1439 }
1428 1440
1429 if (*(t - 1) == '.') 1441 if (*(t - 1) == '.')
1430 t--; 1442 t--;
1443 if ((*(t - 1) == ')' && (inside_paren > 0))) {
1444 t--;
1445 }
1431 strncpy(url_buf, c, t - c); 1446 strncpy(url_buf, c, t - c);
1432 url_buf[t - c] = 0; 1447 url_buf[t - c] = 0;
1433 tmpurlbuf = gaim_unescape_html(url_buf); 1448 tmpurlbuf = gaim_unescape_html(url_buf);
1434 g_string_append_printf(ret, 1449 g_string_append_printf(ret,
1435 "<A HREF=\"http://%s\">%s</A>", tmpurlbuf, 1450 "<A HREF=\"http://%s\">%s</A>", tmpurlbuf,
1447 t = c; 1462 t = c;
1448 while (1) { 1463 while (1) {
1449 if (badchar(*t)) { 1464 if (badchar(*t)) {
1450 if (*(t - 1) == '.') 1465 if (*(t - 1) == '.')
1451 t--; 1466 t--;
1467 if ((*(t - 1) == ')' && (inside_paren > 0))) {
1468 t--;
1469 }
1452 strncpy(url_buf, c, t - c); 1470 strncpy(url_buf, c, t - c);
1453 url_buf[t - c] = 0; 1471 url_buf[t - c] = 0;
1454 tmpurlbuf = gaim_unescape_html(url_buf); 1472 tmpurlbuf = gaim_unescape_html(url_buf);
1455 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", 1473 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
1456 tmpurlbuf, url_buf); 1474 tmpurlbuf, url_buf);
1471 if (t - c == 4) { 1489 if (t - c == 4) {
1472 break; 1490 break;
1473 } 1491 }
1474 if (*(t - 1) == '.') 1492 if (*(t - 1) == '.')
1475 t--; 1493 t--;
1494 if ((*(t - 1) == ')' && (inside_paren > 0))) {
1495 t--;
1496 }
1476 strncpy(url_buf, c, t - c); 1497 strncpy(url_buf, c, t - c);
1477 url_buf[t - c] = 0; 1498 url_buf[t - c] = 0;
1478 tmpurlbuf = gaim_unescape_html(url_buf); 1499 tmpurlbuf = gaim_unescape_html(url_buf);
1479 g_string_append_printf(ret, 1500 g_string_append_printf(ret,
1480 "<A HREF=\"ftp://%s\">%s</A>", tmpurlbuf, 1501 "<A HREF=\"ftp://%s\">%s</A>", tmpurlbuf,
1507 break; 1528 break;
1508 t++; 1529 t++;
1509 1530
1510 } 1531 }
1511 } else if (c != text && (*c == '@')) { 1532 } else if (c != text && (*c == '@')) {
1512 char *tmp;
1513 int flag; 1533 int flag;
1514 int len = 0; 1534 GString *gurl_buf;
1515 const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0"; 1535 const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
1516 url_buf[0] = 0; 1536 url_buf[0] = 0;
1517 1537
1518 if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1))) 1538 if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
1519 flag = 0; 1539 flag = 0;
1520 else 1540 else
1521 flag = 1; 1541 flag = 1;
1522 1542
1523 t = c; 1543 t = c;
1544 gurl_buf = g_string_new("");
1524 while (flag) { 1545 while (flag) {
1525 if (badchar(*t)) { 1546 /* iterate backwards grabbing the local part of an email address */
1526 ret = g_string_truncate(ret, ret->len - (len - 1)); 1547 g = g_utf8_get_char(t);
1548 if (badchar(*t) || (g >= 127) || (*t == '(') ||
1549 ((*t == ';') && (t > (text+2)) && !g_ascii_strncasecmp(t - 3, "&lt;", 4))) {
1550 /* local part will already be part of ret, strip it out */
1551 ret = g_string_truncate(ret, ret->len - (c - t));
1552 ret = g_string_append_unichar(ret, g);
1527 break; 1553 break;
1528 } else { 1554 } else {
1529 len++; 1555 g_string_prepend_unichar(gurl_buf, g);
1530 tmp = g_malloc(len + 1); 1556 t = g_utf8_find_prev_char(text, t);
1531 tmp[len] = 0;
1532 tmp[0] = *t;
1533 strncpy(tmp + 1, url_buf, len - 1);
1534 strcpy(url_buf, tmp);
1535 url_buf[len] = 0;
1536 g_free(tmp);
1537 t--;
1538 if (t < text) { 1557 if (t < text) {
1539 ret = g_string_assign(ret, ""); 1558 ret = g_string_assign(ret, "");
1540 break; 1559 break;
1541 } 1560 }
1542 } 1561 }
1543 } 1562 }
1544 1563
1545 t = c + 1; 1564 t = g_utf8_find_next_char(c, NULL);
1546 1565
1547 while (flag) { 1566 while (flag) {
1548 if (badchar(*t)) { 1567 /* iterate forwards grabbing the domain part of an email address */
1568 g = g_utf8_get_char(t);
1569 if (badchar(*t) || (g >= 127) || (*t == ')') ||
1570 ((*t == '&') && !g_ascii_strncasecmp(t, "&gt;", 4))) {
1549 char *d; 1571 char *d;
1550 1572
1573 strcpy(url_buf, gurl_buf->str);
1574
1575 /* strip off trailing periods */
1551 for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--) 1576 for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
1552 *d = '\0'; 1577 *d = '\0';
1553 1578
1554 tmpurlbuf = gaim_unescape_html(url_buf); 1579 tmpurlbuf = gaim_unescape_html(url_buf);
1555 if (gaim_email_is_valid(tmpurlbuf)) { 1580 if (gaim_email_is_valid(tmpurlbuf)) {
1561 g_free(tmpurlbuf); 1586 g_free(tmpurlbuf);
1562 c = t; 1587 c = t;
1563 1588
1564 break; 1589 break;
1565 } else { 1590 } else {
1566 strncat(url_buf, t, 1); 1591 g_string_append_unichar(gurl_buf, g);
1567 len++; 1592 t = g_utf8_find_next_char(t, NULL);
1568 url_buf[len] = 0; 1593 }
1569 } 1594 }
1570 1595 g_string_free(gurl_buf, TRUE);
1571 t++; 1596 }
1572 } 1597
1598 if(*c == ')' && !inside_html) {
1599 inside_paren--;
1600 ret = g_string_append_c(ret, *c);
1601 c++;
1573 } 1602 }
1574 1603
1575 if (*c == 0) 1604 if (*c == 0)
1576 break; 1605 break;
1577 1606