Mercurial > emacs
comparison lisp/mail/mail-extr.el @ 56600:3e6748f33315
(mail-extr-disable-voodoo): New variable.
(mail-extr-voodoo): Check mail-extr-disable-voodoo.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Thu, 05 Aug 2004 00:15:15 +0000 |
parents | 7302d194fccb |
children | 57c8c9029251 3fd4a5c21153 |
comparison
equal
deleted
inserted
replaced
56599:35a105adc159 | 56600:3e6748f33315 |
---|---|
1432 (widen)) | 1432 (widen)) |
1433 ))) | 1433 ))) |
1434 (if all (nreverse value-list) (car value-list)) | 1434 (if all (nreverse value-list) (car value-list)) |
1435 )) | 1435 )) |
1436 | 1436 |
1437 (defcustom mail-extr-disable-voodoo "\\cj" | |
1438 "*If it is a regexp, names matching it will never be modified. | |
1439 If it is neither nil nor a string, modifying of names will never take | |
1440 place. It affects how `mail-extract-address-components' works." | |
1441 :type '(choice (regexp :size 0) | |
1442 (const :tag "Always enabled" nil) | |
1443 (const :tag "Always disabled" t)) | |
1444 :group 'mail-extr) | |
1445 | |
1437 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) | 1446 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) |
1438 (let ((word-count 0) | 1447 (unless (and mail-extr-disable-voodoo |
1439 (case-fold-search nil) | 1448 (or (not (stringp mail-extr-disable-voodoo)) |
1440 mixed-case-flag lower-case-flag ;;upper-case-flag | 1449 (progn |
1441 suffix-flag last-name-comma-flag | 1450 (goto-char (point-min)) |
1442 ;;cbeg cend | 1451 (re-search-forward mail-extr-disable-voodoo nil t)))) |
1443 initial | 1452 (let ((word-count 0) |
1444 begin-again-flag | 1453 (case-fold-search nil) |
1445 drop-this-word-if-trailing-flag | 1454 mixed-case-flag lower-case-flag ;;upper-case-flag |
1446 drop-last-word-if-trailing-flag | 1455 suffix-flag last-name-comma-flag |
1447 word-found-flag | 1456 ;;cbeg cend |
1448 this-word-beg last-word-beg | 1457 initial |
1449 name-beg name-end | 1458 begin-again-flag |
1450 name-done-flag | 1459 drop-this-word-if-trailing-flag |
1451 ) | 1460 drop-last-word-if-trailing-flag |
1452 (save-excursion | 1461 word-found-flag |
1453 (set-syntax-table mail-extr-address-text-syntax-table) | 1462 this-word-beg last-word-beg |
1454 | 1463 name-beg name-end |
1455 ;; Get rid of comments. | 1464 name-done-flag |
1456 (goto-char (point-min)) | 1465 ) |
1457 (while (not (eobp)) | 1466 (save-excursion |
1458 ;; Initialize for this iteration of the loop. | 1467 (set-syntax-table mail-extr-address-text-syntax-table) |
1459 (skip-chars-forward "^({[\"'`") | 1468 |
1460 (let ((cbeg (point))) | 1469 ;; Get rid of comments. |
1461 (set-syntax-table mail-extr-address-text-comment-syntax-table) | |
1462 (if (memq (following-char) '(?\' ?\`)) | |
1463 (search-forward "'" nil 'move | |
1464 (if (eq ?\' (following-char)) 2 1)) | |
1465 (or (mail-extr-safe-move-sexp 1) | |
1466 (goto-char (point-max)))) | |
1467 (set-syntax-table mail-extr-address-text-syntax-table) | |
1468 (when (eq (char-after cbeg) ?\() | |
1469 ;; Delete the comment itself. | |
1470 (delete-region cbeg (point)) | |
1471 ;; Canonicalize whitespace where the comment was. | |
1472 (skip-chars-backward " \t") | |
1473 (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)") | |
1474 (replace-match "") | |
1475 (setq cbeg (point)) | |
1476 (skip-chars-forward " \t") | |
1477 (if (bobp) | |
1478 (delete-region (point) cbeg) | |
1479 (just-one-space)))))) | |
1480 | |
1481 ;; This was moved above. | |
1482 ;; Fix . used as space | |
1483 ;; But it belongs here because it occurs not only as | |
1484 ;; rypens@reks.uia.ac.be (Piet.Rypens) | |
1485 ;; but also as | |
1486 ;; "Piet.Rypens" <rypens@reks.uia.ac.be> | |
1487 ;;(goto-char (point-min)) | |
1488 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t) | |
1489 ;; (replace-match "\\1 \\2" t)) | |
1490 | |
1491 (unless (search-forward " " nil t) | |
1492 (goto-char (point-min)) | 1470 (goto-char (point-min)) |
1493 (cond ((search-forward "_" nil t) | 1471 (while (not (eobp)) |
1494 ;; Handle the *idiotic* use of underlines as spaces. | 1472 ;; Initialize for this iteration of the loop. |
1495 ;; Example: fml@foo.bar.dom (First_M._Last) | 1473 (skip-chars-forward "^({[\"'`") |
1496 (goto-char (point-min)) | 1474 (let ((cbeg (point))) |
1497 (while (search-forward "_" nil t) | 1475 (set-syntax-table mail-extr-address-text-comment-syntax-table) |
1498 (replace-match " " t))) | 1476 (if (memq (following-char) '(?\' ?\`)) |
1499 ((search-forward "." nil t) | 1477 (search-forward "'" nil 'move |
1500 ;; Fix . used as space | 1478 (if (eq ?\' (following-char)) 2 1)) |
1501 ;; Example: danj1@cb.att.com (daniel.jacobson) | 1479 (or (mail-extr-safe-move-sexp 1) |
1502 (goto-char (point-min)) | 1480 (goto-char (point-max)))) |
1503 (while (re-search-forward mail-extr-bad-dot-pattern nil t) | 1481 (set-syntax-table mail-extr-address-text-syntax-table) |
1504 (replace-match "\\1 \\2" t))))) | 1482 (when (eq (char-after cbeg) ?\() |
1505 | 1483 ;; Delete the comment itself. |
1506 ;; Loop over the words (and other junk) in the name. | 1484 (delete-region cbeg (point)) |
1507 (goto-char (point-min)) | 1485 ;; Canonicalize whitespace where the comment was. |
1508 (while (not name-done-flag) | 1486 (skip-chars-backward " \t") |
1509 | 1487 (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)") |
1510 (when word-found-flag | 1488 (replace-match "") |
1511 ;; Last time through this loop we skipped over a word. | 1489 (setq cbeg (point)) |
1512 (setq last-word-beg this-word-beg) | 1490 (skip-chars-forward " \t") |
1513 (setq drop-last-word-if-trailing-flag | 1491 (if (bobp) |
1514 drop-this-word-if-trailing-flag) | 1492 (delete-region (point) cbeg) |
1515 (setq word-found-flag nil)) | 1493 (just-one-space)))))) |
1516 | 1494 |
1517 (when begin-again-flag | 1495 ;; This was moved above. |
1518 ;; Last time through the loop we found something that | 1496 ;; Fix . used as space |
1519 ;; indicates we should pretend we are beginning again from | 1497 ;; But it belongs here because it occurs not only as |
1520 ;; the start. | 1498 ;; rypens@reks.uia.ac.be (Piet.Rypens) |
1521 (setq word-count 0) | 1499 ;; but also as |
1522 (setq last-word-beg nil) | 1500 ;; "Piet.Rypens" <rypens@reks.uia.ac.be> |
1523 (setq drop-last-word-if-trailing-flag nil) | 1501 ;;(goto-char (point-min)) |
1524 (setq mixed-case-flag nil) | 1502 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t) |
1525 (setq lower-case-flag nil) | 1503 ;; (replace-match "\\1 \\2" t)) |
1526 ;; (setq upper-case-flag nil) | 1504 |
1527 (setq begin-again-flag nil)) | 1505 (unless (search-forward " " nil t) |
1528 | 1506 (goto-char (point-min)) |
1529 ;; Initialize for this iteration of the loop. | 1507 (cond ((search-forward "_" nil t) |
1530 (mail-extr-skip-whitespace-forward) | 1508 ;; Handle the *idiotic* use of underlines as spaces. |
1531 (if (eq word-count 0) (narrow-to-region (point) (point-max))) | 1509 ;; Example: fml@foo.bar.dom (First_M._Last) |
1532 (setq this-word-beg (point)) | 1510 (goto-char (point-min)) |
1533 (setq drop-this-word-if-trailing-flag nil) | 1511 (while (search-forward "_" nil t) |
1534 | 1512 (replace-match " " t))) |
1535 ;; Decide what to do based on what we are looking at. | 1513 ((search-forward "." nil t) |
1536 (cond | 1514 ;; Fix . used as space |
1537 | 1515 ;; Example: danj1@cb.att.com (daniel.jacobson) |
1538 ;; Delete title | 1516 (goto-char (point-min)) |
1539 ((and (eq word-count 0) | 1517 (while (re-search-forward mail-extr-bad-dot-pattern nil t) |
1540 (looking-at mail-extr-full-name-prefixes)) | 1518 (replace-match "\\1 \\2" t))))) |
1541 (goto-char (match-end 0)) | 1519 |
1520 ;; Loop over the words (and other junk) in the name. | |
1521 (goto-char (point-min)) | |
1522 (while (not name-done-flag) | |
1523 | |
1524 (when word-found-flag | |
1525 ;; Last time through this loop we skipped over a word. | |
1526 (setq last-word-beg this-word-beg) | |
1527 (setq drop-last-word-if-trailing-flag | |
1528 drop-this-word-if-trailing-flag) | |
1529 (setq word-found-flag nil)) | |
1530 | |
1531 (when begin-again-flag | |
1532 ;; Last time through the loop we found something that | |
1533 ;; indicates we should pretend we are beginning again from | |
1534 ;; the start. | |
1535 (setq word-count 0) | |
1536 (setq last-word-beg nil) | |
1537 (setq drop-last-word-if-trailing-flag nil) | |
1538 (setq mixed-case-flag nil) | |
1539 (setq lower-case-flag nil) | |
1540 ;; (setq upper-case-flag nil) | |
1541 (setq begin-again-flag nil)) | |
1542 | |
1543 ;; Initialize for this iteration of the loop. | |
1544 (mail-extr-skip-whitespace-forward) | |
1545 (if (eq word-count 0) (narrow-to-region (point) (point-max))) | |
1546 (setq this-word-beg (point)) | |
1547 (setq drop-this-word-if-trailing-flag nil) | |
1548 | |
1549 ;; Decide what to do based on what we are looking at. | |
1550 (cond | |
1551 | |
1552 ;; Delete title | |
1553 ((and (eq word-count 0) | |
1554 (looking-at mail-extr-full-name-prefixes)) | |
1555 (goto-char (match-end 0)) | |
1556 (narrow-to-region (point) (point-max))) | |
1557 | |
1558 ;; Stop after name suffix | |
1559 ((and (>= word-count 2) | |
1560 (looking-at mail-extr-full-name-suffix-pattern)) | |
1561 (mail-extr-skip-whitespace-backward) | |
1562 (setq suffix-flag (point)) | |
1563 (if (eq ?, (following-char)) | |
1564 (forward-char 1) | |
1565 (insert ?,)) | |
1566 ;; Enforce at least one space after comma | |
1567 (or (eq ?\ (following-char)) | |
1568 (insert ?\ )) | |
1569 (mail-extr-skip-whitespace-forward) | |
1570 (cond ((memq (following-char) '(?j ?J ?s ?S)) | |
1571 (capitalize-word 1) | |
1572 (if (eq (following-char) ?.) | |
1573 (forward-char 1) | |
1574 (insert ?.))) | |
1575 (t | |
1576 (upcase-word 1))) | |
1577 (setq word-found-flag t) | |
1578 (setq name-done-flag t)) | |
1579 | |
1580 ;; Handle SCA names | |
1581 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" | |
1582 (goto-char (match-beginning 1)) | |
1583 (narrow-to-region (point) (point-max)) | |
1584 (setq begin-again-flag t)) | |
1585 | |
1586 ;; Check for initial last name followed by comma | |
1587 ((and (eq ?, (following-char)) | |
1588 (eq word-count 1)) | |
1589 (forward-char 1) | |
1590 (setq last-name-comma-flag t) | |
1591 (or (eq ?\ (following-char)) | |
1592 (insert ?\ ))) | |
1593 | |
1594 ;; Stop before trailing comma-separated comment | |
1595 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | |
1596 ;; *** This case is redundant??? | |
1597 ;;((eq ?, (following-char)) | |
1598 ;; (setq name-done-flag t)) | |
1599 | |
1600 ;; Delete parenthesized/quoted comment/nickname | |
1601 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) | |
1602 (setq cbeg (point)) | |
1603 (set-syntax-table mail-extr-address-text-comment-syntax-table) | |
1604 (cond ((memq (following-char) '(?\' ?\`)) | |
1605 (or (search-forward "'" nil t | |
1606 (if (eq ?\' (following-char)) 2 1)) | |
1607 (delete-char 1))) | |
1608 (t | |
1609 (or (mail-extr-safe-move-sexp 1) | |
1610 (goto-char (point-max))))) | |
1611 (set-syntax-table mail-extr-address-text-syntax-table) | |
1612 (setq cend (point)) | |
1613 (cond | |
1614 ;; Handle case of entire name being quoted | |
1615 ((and (eq word-count 0) | |
1616 (looking-at " *\\'") | |
1617 (>= (- cend cbeg) 2)) | |
1618 (narrow-to-region (1+ cbeg) (1- cend)) | |
1619 (goto-char (point-min))) | |
1620 (t | |
1621 ;; Handle case of quoted initial | |
1622 (if (and (or (= 3 (- cend cbeg)) | |
1623 (and (= 4 (- cend cbeg)) | |
1624 (eq ?. (char-after (+ 2 cbeg))))) | |
1625 (not (looking-at " *\\'"))) | |
1626 (setq initial (char-after (1+ cbeg))) | |
1627 (setq initial nil)) | |
1628 (delete-region cbeg cend) | |
1629 (if initial | |
1630 (insert initial ". "))))) | |
1631 | |
1632 ;; Handle *Stupid* VMS date stamps | |
1633 ((looking-at mail-extr-stupid-vms-date-stamp-pattern) | |
1634 (replace-match "" t)) | |
1635 | |
1636 ;; Handle Chinese characters. | |
1637 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern) | |
1638 (goto-char (match-end 0)) | |
1639 (setq word-found-flag t)) | |
1640 | |
1641 ;; Skip initial garbage characters. | |
1642 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | |
1643 ((and (eq word-count 0) | |
1644 (looking-at mail-extr-leading-garbage)) | |
1645 (goto-char (match-end 0)) | |
1646 ;; *** Skip backward over these??? | |
1647 ;; (skip-chars-backward "& \"") | |
1648 (narrow-to-region (point) (point-max))) | |
1649 | |
1650 ;; Various stopping points | |
1651 ((or | |
1652 | |
1653 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case | |
1654 ;; words. Example: XT-DEM. | |
1655 (and (>= word-count 2) | |
1656 mixed-case-flag | |
1657 (looking-at mail-extr-weird-acronym-pattern) | |
1658 (not (looking-at mail-extr-roman-numeral-pattern))) | |
1659 | |
1660 ;; Stop before trailing alternative address | |
1661 (looking-at mail-extr-alternative-address-pattern) | |
1662 | |
1663 ;; Stop before trailing comment not introduced by comma | |
1664 ;; THIS CASE MUST BE AFTER AN EARLIER CASE. | |
1665 (looking-at mail-extr-trailing-comment-start-pattern) | |
1666 | |
1667 ;; Stop before telephone numbers | |
1668 (and (>= word-count 1) | |
1669 (looking-at mail-extr-telephone-extension-pattern))) | |
1670 (setq name-done-flag t)) | |
1671 | |
1672 ;; Delete ham radio call signs | |
1673 ((looking-at mail-extr-ham-call-sign-pattern) | |
1674 (delete-region (match-beginning 0) (match-end 0))) | |
1675 | |
1676 ;; Fixup initials | |
1677 ((looking-at mail-extr-initial-pattern) | |
1678 (or (eq (following-char) (upcase (following-char))) | |
1679 (setq lower-case-flag t)) | |
1680 (forward-char 1) | |
1681 (if (eq ?. (following-char)) | |
1682 (forward-char 1) | |
1683 (insert ?.)) | |
1684 (or (eq ?\ (following-char)) | |
1685 (insert ?\ )) | |
1686 (setq word-found-flag t)) | |
1687 | |
1688 ;; Handle BITNET LISTSERV list names. | |
1689 ((and (eq word-count 0) | |
1690 (looking-at mail-extr-listserv-list-name-pattern)) | |
1691 (narrow-to-region (match-beginning 1) (match-end 1)) | |
1692 (setq word-found-flag t) | |
1693 (setq name-done-flag t)) | |
1694 | |
1695 ;; Handle & substitution, when & is last and is not first. | |
1696 ((and (> word-count 0) | |
1697 (eq ?\ (preceding-char)) | |
1698 (eq (following-char) ?&) | |
1699 (eq (1+ (point)) (point-max))) | |
1700 (delete-char 1) | |
1701 (capitalize-region | |
1702 (point) | |
1703 (progn | |
1704 (insert-buffer-substring canonicalization-buffer | |
1705 mbox-beg mbox-end) | |
1706 (point))) | |
1707 (setq disable-initial-guessing-flag t) | |
1708 (setq word-found-flag t)) | |
1709 | |
1710 ;; Handle & between names, as in "Bob & Susie". | |
1711 ((and (> word-count 0) (eq (following-char) ?\&)) | |
1712 (setq name-beg (point)) | |
1713 (setq name-end (1+ name-beg)) | |
1714 (setq word-found-flag t) | |
1715 (goto-char name-end)) | |
1716 | |
1717 ;; Regular name words | |
1718 ((looking-at mail-extr-name-pattern) | |
1719 (setq name-beg (point)) | |
1720 (setq name-end (match-end 0)) | |
1721 | |
1722 ;; Certain words will be dropped if they are at the end. | |
1723 (and (>= word-count 2) | |
1724 (not lower-case-flag) | |
1725 (or | |
1726 ;; Trailing 4-or-more letter lowercase words preceded by | |
1727 ;; mixed case or uppercase words will be dropped. | |
1728 (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'") | |
1729 ;; Drop a trailing word which is terminated with a period. | |
1730 (eq ?. (char-after (1- name-end)))) | |
1731 (setq drop-this-word-if-trailing-flag t)) | |
1732 | |
1733 ;; Set the flags that indicate whether we have seen a lowercase | |
1734 ;; word, a mixed case word, and an uppercase word. | |
1735 (if (re-search-forward "[[:lower:]]" name-end t) | |
1736 (if (progn | |
1737 (goto-char name-beg) | |
1738 (re-search-forward "[[:upper:]]" name-end t)) | |
1739 (setq mixed-case-flag t) | |
1740 (setq lower-case-flag t)) | |
1741 ;; (setq upper-case-flag t) | |
1742 ) | |
1743 | |
1744 (goto-char name-end) | |
1745 (setq word-found-flag t)) | |
1746 | |
1747 ;; Allow a number as a word, if it doesn't mean anything else. | |
1748 ((looking-at "[0-9]+\\>") | |
1749 (setq name-beg (point)) | |
1750 (setq name-end (match-end 0)) | |
1751 (goto-char name-end) | |
1752 (setq word-found-flag t)) | |
1753 | |
1754 (t | |
1755 (setq name-done-flag t) | |
1756 )) | |
1757 | |
1758 ;; Count any word that we skipped over. | |
1759 (if word-found-flag | |
1760 (setq word-count (1+ word-count)))) | |
1761 | |
1762 ;; If the last thing in the name is 2 or more periods, or one or more | |
1763 ;; other sentence terminators (but not a single period) then keep them | |
1764 ;; and the preceding word. This is for the benefit of whole sentences | |
1765 ;; in the name field: it's better behavior than dropping the last word | |
1766 ;; of the sentence... | |
1767 (if (and (not suffix-flag) | |
1768 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'")) | |
1769 (goto-char (setq suffix-flag (point-max)))) | |
1770 | |
1771 ;; Drop everything after point and certain trailing words. | |
1772 (narrow-to-region (point-min) | |
1773 (or (and drop-last-word-if-trailing-flag | |
1774 last-word-beg) | |
1775 (point))) | |
1776 | |
1777 ;; Xerox's mailers SUCK!!!!!! | |
1778 ;; We simply refuse to believe that any last name is PARC or ADOC. | |
1779 ;; If it looks like that is the last name, that there is no meaningful | |
1780 ;; here at all. Actually I guess it would be best to map patterns | |
1781 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't | |
1782 ;; actually know that that is what's going on. | |
1783 (unless suffix-flag | |
1784 (goto-char (point-min)) | |
1785 (let ((case-fold-search t)) | |
1786 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'") | |
1787 (erase-buffer)))) | |
1788 | |
1789 ;; If last name first put it at end (but before suffix) | |
1790 (when last-name-comma-flag | |
1791 (goto-char (point-min)) | |
1792 (search-forward ",") | |
1793 (setq name-end (1- (point))) | |
1794 (goto-char (or suffix-flag (point-max))) | |
1795 (or (eq ?\ (preceding-char)) | |
1796 (insert ?\ )) | |
1797 (insert-buffer-substring (current-buffer) (point-min) name-end) | |
1798 (goto-char name-end) | |
1799 (skip-chars-forward "\t ,") | |
1542 (narrow-to-region (point) (point-max))) | 1800 (narrow-to-region (point) (point-max))) |
1543 | 1801 |
1544 ;; Stop after name suffix | 1802 ;; Delete leading and trailing junk characters. |
1545 ((and (>= word-count 2) | 1803 ;; *** This is probably completely unneeded now. |
1546 (looking-at mail-extr-full-name-suffix-pattern)) | 1804 ;;(goto-char (point-max)) |
1547 (mail-extr-skip-whitespace-backward) | 1805 ;;(skip-chars-backward mail-extr-non-end-name-chars) |
1548 (setq suffix-flag (point)) | 1806 ;;(if (eq ?. (following-char)) |
1549 (if (eq ?, (following-char)) | 1807 ;; (forward-char 1)) |
1550 (forward-char 1) | 1808 ;;(narrow-to-region (point) |
1551 (insert ?,)) | 1809 ;; (progn |
1552 ;; Enforce at least one space after comma | 1810 ;; (goto-char (point-min)) |
1553 (or (eq ?\ (following-char)) | 1811 ;; (skip-chars-forward mail-extr-non-begin-name-chars) |
1554 (insert ?\ )) | 1812 ;; (point))) |
1555 (mail-extr-skip-whitespace-forward) | 1813 |
1556 (cond ((memq (following-char) '(?j ?J ?s ?S)) | 1814 ;; Compress whitespace |
1557 (capitalize-word 1) | |
1558 (if (eq (following-char) ?.) | |
1559 (forward-char 1) | |
1560 (insert ?.))) | |
1561 (t | |
1562 (upcase-word 1))) | |
1563 (setq word-found-flag t) | |
1564 (setq name-done-flag t)) | |
1565 | |
1566 ;; Handle SCA names | |
1567 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" | |
1568 (goto-char (match-beginning 1)) | |
1569 (narrow-to-region (point) (point-max)) | |
1570 (setq begin-again-flag t)) | |
1571 | |
1572 ;; Check for initial last name followed by comma | |
1573 ((and (eq ?, (following-char)) | |
1574 (eq word-count 1)) | |
1575 (forward-char 1) | |
1576 (setq last-name-comma-flag t) | |
1577 (or (eq ?\ (following-char)) | |
1578 (insert ?\ ))) | |
1579 | |
1580 ;; Stop before trailing comma-separated comment | |
1581 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | |
1582 ;; *** This case is redundant??? | |
1583 ;;((eq ?, (following-char)) | |
1584 ;; (setq name-done-flag t)) | |
1585 | |
1586 ;; Delete parenthesized/quoted comment/nickname | |
1587 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) | |
1588 (setq cbeg (point)) | |
1589 (set-syntax-table mail-extr-address-text-comment-syntax-table) | |
1590 (cond ((memq (following-char) '(?\' ?\`)) | |
1591 (or (search-forward "'" nil t | |
1592 (if (eq ?\' (following-char)) 2 1)) | |
1593 (delete-char 1))) | |
1594 (t | |
1595 (or (mail-extr-safe-move-sexp 1) | |
1596 (goto-char (point-max))))) | |
1597 (set-syntax-table mail-extr-address-text-syntax-table) | |
1598 (setq cend (point)) | |
1599 (cond | |
1600 ;; Handle case of entire name being quoted | |
1601 ((and (eq word-count 0) | |
1602 (looking-at " *\\'") | |
1603 (>= (- cend cbeg) 2)) | |
1604 (narrow-to-region (1+ cbeg) (1- cend)) | |
1605 (goto-char (point-min))) | |
1606 (t | |
1607 ;; Handle case of quoted initial | |
1608 (if (and (or (= 3 (- cend cbeg)) | |
1609 (and (= 4 (- cend cbeg)) | |
1610 (eq ?. (char-after (+ 2 cbeg))))) | |
1611 (not (looking-at " *\\'"))) | |
1612 (setq initial (char-after (1+ cbeg))) | |
1613 (setq initial nil)) | |
1614 (delete-region cbeg cend) | |
1615 (if initial | |
1616 (insert initial ". "))))) | |
1617 | |
1618 ;; Handle *Stupid* VMS date stamps | |
1619 ((looking-at mail-extr-stupid-vms-date-stamp-pattern) | |
1620 (replace-match "" t)) | |
1621 | |
1622 ;; Handle Chinese characters. | |
1623 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern) | |
1624 (goto-char (match-end 0)) | |
1625 (setq word-found-flag t)) | |
1626 | |
1627 ;; Skip initial garbage characters. | |
1628 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | |
1629 ((and (eq word-count 0) | |
1630 (looking-at mail-extr-leading-garbage)) | |
1631 (goto-char (match-end 0)) | |
1632 ;; *** Skip backward over these??? | |
1633 ;; (skip-chars-backward "& \"") | |
1634 (narrow-to-region (point) (point-max))) | |
1635 | |
1636 ;; Various stopping points | |
1637 ((or | |
1638 | |
1639 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case | |
1640 ;; words. Example: XT-DEM. | |
1641 (and (>= word-count 2) | |
1642 mixed-case-flag | |
1643 (looking-at mail-extr-weird-acronym-pattern) | |
1644 (not (looking-at mail-extr-roman-numeral-pattern))) | |
1645 | |
1646 ;; Stop before trailing alternative address | |
1647 (looking-at mail-extr-alternative-address-pattern) | |
1648 | |
1649 ;; Stop before trailing comment not introduced by comma | |
1650 ;; THIS CASE MUST BE AFTER AN EARLIER CASE. | |
1651 (looking-at mail-extr-trailing-comment-start-pattern) | |
1652 | |
1653 ;; Stop before telephone numbers | |
1654 (and (>= word-count 1) | |
1655 (looking-at mail-extr-telephone-extension-pattern))) | |
1656 (setq name-done-flag t)) | |
1657 | |
1658 ;; Delete ham radio call signs | |
1659 ((looking-at mail-extr-ham-call-sign-pattern) | |
1660 (delete-region (match-beginning 0) (match-end 0))) | |
1661 | |
1662 ;; Fixup initials | |
1663 ((looking-at mail-extr-initial-pattern) | |
1664 (or (eq (following-char) (upcase (following-char))) | |
1665 (setq lower-case-flag t)) | |
1666 (forward-char 1) | |
1667 (if (eq ?. (following-char)) | |
1668 (forward-char 1) | |
1669 (insert ?.)) | |
1670 (or (eq ?\ (following-char)) | |
1671 (insert ?\ )) | |
1672 (setq word-found-flag t)) | |
1673 | |
1674 ;; Handle BITNET LISTSERV list names. | |
1675 ((and (eq word-count 0) | |
1676 (looking-at mail-extr-listserv-list-name-pattern)) | |
1677 (narrow-to-region (match-beginning 1) (match-end 1)) | |
1678 (setq word-found-flag t) | |
1679 (setq name-done-flag t)) | |
1680 | |
1681 ;; Handle & substitution, when & is last and is not first. | |
1682 ((and (> word-count 0) | |
1683 (eq ?\ (preceding-char)) | |
1684 (eq (following-char) ?&) | |
1685 (eq (1+ (point)) (point-max))) | |
1686 (delete-char 1) | |
1687 (capitalize-region | |
1688 (point) | |
1689 (progn | |
1690 (insert-buffer-substring canonicalization-buffer | |
1691 mbox-beg mbox-end) | |
1692 (point))) | |
1693 (setq disable-initial-guessing-flag t) | |
1694 (setq word-found-flag t)) | |
1695 | |
1696 ;; Handle & between names, as in "Bob & Susie". | |
1697 ((and (> word-count 0) (eq (following-char) ?\&)) | |
1698 (setq name-beg (point)) | |
1699 (setq name-end (1+ name-beg)) | |
1700 (setq word-found-flag t) | |
1701 (goto-char name-end)) | |
1702 | |
1703 ;; Regular name words | |
1704 ((looking-at mail-extr-name-pattern) | |
1705 (setq name-beg (point)) | |
1706 (setq name-end (match-end 0)) | |
1707 | |
1708 ;; Certain words will be dropped if they are at the end. | |
1709 (and (>= word-count 2) | |
1710 (not lower-case-flag) | |
1711 (or | |
1712 ;; Trailing 4-or-more letter lowercase words preceded by | |
1713 ;; mixed case or uppercase words will be dropped. | |
1714 (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'") | |
1715 ;; Drop a trailing word which is terminated with a period. | |
1716 (eq ?. (char-after (1- name-end)))) | |
1717 (setq drop-this-word-if-trailing-flag t)) | |
1718 | |
1719 ;; Set the flags that indicate whether we have seen a lowercase | |
1720 ;; word, a mixed case word, and an uppercase word. | |
1721 (if (re-search-forward "[[:lower:]]" name-end t) | |
1722 (if (progn | |
1723 (goto-char name-beg) | |
1724 (re-search-forward "[[:upper:]]" name-end t)) | |
1725 (setq mixed-case-flag t) | |
1726 (setq lower-case-flag t)) | |
1727 ;; (setq upper-case-flag t) | |
1728 ) | |
1729 | |
1730 (goto-char name-end) | |
1731 (setq word-found-flag t)) | |
1732 | |
1733 ;; Allow a number as a word, if it doesn't mean anything else. | |
1734 ((looking-at "[0-9]+\\>") | |
1735 (setq name-beg (point)) | |
1736 (setq name-end (match-end 0)) | |
1737 (goto-char name-end) | |
1738 (setq word-found-flag t)) | |
1739 | |
1740 (t | |
1741 (setq name-done-flag t) | |
1742 )) | |
1743 | |
1744 ;; Count any word that we skipped over. | |
1745 (if word-found-flag | |
1746 (setq word-count (1+ word-count)))) | |
1747 | |
1748 ;; If the last thing in the name is 2 or more periods, or one or more | |
1749 ;; other sentence terminators (but not a single period) then keep them | |
1750 ;; and the preceding word. This is for the benefit of whole sentences | |
1751 ;; in the name field: it's better behavior than dropping the last word | |
1752 ;; of the sentence... | |
1753 (if (and (not suffix-flag) | |
1754 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'")) | |
1755 (goto-char (setq suffix-flag (point-max)))) | |
1756 | |
1757 ;; Drop everything after point and certain trailing words. | |
1758 (narrow-to-region (point-min) | |
1759 (or (and drop-last-word-if-trailing-flag | |
1760 last-word-beg) | |
1761 (point))) | |
1762 | |
1763 ;; Xerox's mailers SUCK!!!!!! | |
1764 ;; We simply refuse to believe that any last name is PARC or ADOC. | |
1765 ;; If it looks like that is the last name, that there is no meaningful | |
1766 ;; here at all. Actually I guess it would be best to map patterns | |
1767 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't | |
1768 ;; actually know that that is what's going on. | |
1769 (unless suffix-flag | |
1770 (goto-char (point-min)) | 1815 (goto-char (point-min)) |
1771 (let ((case-fold-search t)) | 1816 (while (re-search-forward "[ \t\n]+" nil t) |
1772 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'") | 1817 (replace-match (if (eobp) "" " ") t)) |
1773 (erase-buffer)))) | 1818 )))) |
1774 | |
1775 ;; If last name first put it at end (but before suffix) | |
1776 (when last-name-comma-flag | |
1777 (goto-char (point-min)) | |
1778 (search-forward ",") | |
1779 (setq name-end (1- (point))) | |
1780 (goto-char (or suffix-flag (point-max))) | |
1781 (or (eq ?\ (preceding-char)) | |
1782 (insert ?\ )) | |
1783 (insert-buffer-substring (current-buffer) (point-min) name-end) | |
1784 (goto-char name-end) | |
1785 (skip-chars-forward "\t ,") | |
1786 (narrow-to-region (point) (point-max))) | |
1787 | |
1788 ;; Delete leading and trailing junk characters. | |
1789 ;; *** This is probably completely unneeded now. | |
1790 ;;(goto-char (point-max)) | |
1791 ;;(skip-chars-backward mail-extr-non-end-name-chars) | |
1792 ;;(if (eq ?. (following-char)) | |
1793 ;; (forward-char 1)) | |
1794 ;;(narrow-to-region (point) | |
1795 ;; (progn | |
1796 ;; (goto-char (point-min)) | |
1797 ;; (skip-chars-forward mail-extr-non-begin-name-chars) | |
1798 ;; (point))) | |
1799 | |
1800 ;; Compress whitespace | |
1801 (goto-char (point-min)) | |
1802 (while (re-search-forward "[ \t\n]+" nil t) | |
1803 (replace-match (if (eobp) "" " ") t)) | |
1804 ))) | |
1805 | 1819 |
1806 | 1820 |
1807 | 1821 |
1808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 1822 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1809 ;; | 1823 ;; |