comparison lisp/mail/mail-extr.el @ 56600:3e6748f33315

(mail-extr-disable-voodoo): New variable. (mail-extr-voodoo): Check mail-extr-disable-voodoo.
author Kenichi Handa <handa@m17n.org>
date Thu, 05 Aug 2004 00:15:15 +0000
parents 7302d194fccb
children 57c8c9029251 3fd4a5c21153
comparison
equal deleted inserted replaced
56599:35a105adc159 56600:3e6748f33315
1432 (widen)) 1432 (widen))
1433 ))) 1433 )))
1434 (if all (nreverse value-list) (car value-list)) 1434 (if all (nreverse value-list) (car value-list))
1435 )) 1435 ))
1436 1436
1437 (defcustom mail-extr-disable-voodoo "\\cj"
1438 "*If it is a regexp, names matching it will never be modified.
1439 If it is neither nil nor a string, modifying of names will never take
1440 place. It affects how `mail-extract-address-components' works."
1441 :type '(choice (regexp :size 0)
1442 (const :tag "Always enabled" nil)
1443 (const :tag "Always disabled" t))
1444 :group 'mail-extr)
1445
1437 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) 1446 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
1438 (let ((word-count 0) 1447 (unless (and mail-extr-disable-voodoo
1439 (case-fold-search nil) 1448 (or (not (stringp mail-extr-disable-voodoo))
1440 mixed-case-flag lower-case-flag ;;upper-case-flag 1449 (progn
1441 suffix-flag last-name-comma-flag 1450 (goto-char (point-min))
1442 ;;cbeg cend 1451 (re-search-forward mail-extr-disable-voodoo nil t))))
1443 initial 1452 (let ((word-count 0)
1444 begin-again-flag 1453 (case-fold-search nil)
1445 drop-this-word-if-trailing-flag 1454 mixed-case-flag lower-case-flag ;;upper-case-flag
1446 drop-last-word-if-trailing-flag 1455 suffix-flag last-name-comma-flag
1447 word-found-flag 1456 ;;cbeg cend
1448 this-word-beg last-word-beg 1457 initial
1449 name-beg name-end 1458 begin-again-flag
1450 name-done-flag 1459 drop-this-word-if-trailing-flag
1451 ) 1460 drop-last-word-if-trailing-flag
1452 (save-excursion 1461 word-found-flag
1453 (set-syntax-table mail-extr-address-text-syntax-table) 1462 this-word-beg last-word-beg
1454 1463 name-beg name-end
1455 ;; Get rid of comments. 1464 name-done-flag
1456 (goto-char (point-min)) 1465 )
1457 (while (not (eobp)) 1466 (save-excursion
1458 ;; Initialize for this iteration of the loop. 1467 (set-syntax-table mail-extr-address-text-syntax-table)
1459 (skip-chars-forward "^({[\"'`") 1468
1460 (let ((cbeg (point))) 1469 ;; Get rid of comments.
1461 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1462 (if (memq (following-char) '(?\' ?\`))
1463 (search-forward "'" nil 'move
1464 (if (eq ?\' (following-char)) 2 1))
1465 (or (mail-extr-safe-move-sexp 1)
1466 (goto-char (point-max))))
1467 (set-syntax-table mail-extr-address-text-syntax-table)
1468 (when (eq (char-after cbeg) ?\()
1469 ;; Delete the comment itself.
1470 (delete-region cbeg (point))
1471 ;; Canonicalize whitespace where the comment was.
1472 (skip-chars-backward " \t")
1473 (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
1474 (replace-match "")
1475 (setq cbeg (point))
1476 (skip-chars-forward " \t")
1477 (if (bobp)
1478 (delete-region (point) cbeg)
1479 (just-one-space))))))
1480
1481 ;; This was moved above.
1482 ;; Fix . used as space
1483 ;; But it belongs here because it occurs not only as
1484 ;; rypens@reks.uia.ac.be (Piet.Rypens)
1485 ;; but also as
1486 ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
1487 ;;(goto-char (point-min))
1488 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
1489 ;; (replace-match "\\1 \\2" t))
1490
1491 (unless (search-forward " " nil t)
1492 (goto-char (point-min)) 1470 (goto-char (point-min))
1493 (cond ((search-forward "_" nil t) 1471 (while (not (eobp))
1494 ;; Handle the *idiotic* use of underlines as spaces. 1472 ;; Initialize for this iteration of the loop.
1495 ;; Example: fml@foo.bar.dom (First_M._Last) 1473 (skip-chars-forward "^({[\"'`")
1496 (goto-char (point-min)) 1474 (let ((cbeg (point)))
1497 (while (search-forward "_" nil t) 1475 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1498 (replace-match " " t))) 1476 (if (memq (following-char) '(?\' ?\`))
1499 ((search-forward "." nil t) 1477 (search-forward "'" nil 'move
1500 ;; Fix . used as space 1478 (if (eq ?\' (following-char)) 2 1))
1501 ;; Example: danj1@cb.att.com (daniel.jacobson) 1479 (or (mail-extr-safe-move-sexp 1)
1502 (goto-char (point-min)) 1480 (goto-char (point-max))))
1503 (while (re-search-forward mail-extr-bad-dot-pattern nil t) 1481 (set-syntax-table mail-extr-address-text-syntax-table)
1504 (replace-match "\\1 \\2" t))))) 1482 (when (eq (char-after cbeg) ?\()
1505 1483 ;; Delete the comment itself.
1506 ;; Loop over the words (and other junk) in the name. 1484 (delete-region cbeg (point))
1507 (goto-char (point-min)) 1485 ;; Canonicalize whitespace where the comment was.
1508 (while (not name-done-flag) 1486 (skip-chars-backward " \t")
1509 1487 (if (looking-at "\\([ \t]+$\\|[ \t]+,\\)")
1510 (when word-found-flag 1488 (replace-match "")
1511 ;; Last time through this loop we skipped over a word. 1489 (setq cbeg (point))
1512 (setq last-word-beg this-word-beg) 1490 (skip-chars-forward " \t")
1513 (setq drop-last-word-if-trailing-flag 1491 (if (bobp)
1514 drop-this-word-if-trailing-flag) 1492 (delete-region (point) cbeg)
1515 (setq word-found-flag nil)) 1493 (just-one-space))))))
1516 1494
1517 (when begin-again-flag 1495 ;; This was moved above.
1518 ;; Last time through the loop we found something that 1496 ;; Fix . used as space
1519 ;; indicates we should pretend we are beginning again from 1497 ;; But it belongs here because it occurs not only as
1520 ;; the start. 1498 ;; rypens@reks.uia.ac.be (Piet.Rypens)
1521 (setq word-count 0) 1499 ;; but also as
1522 (setq last-word-beg nil) 1500 ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
1523 (setq drop-last-word-if-trailing-flag nil) 1501 ;;(goto-char (point-min))
1524 (setq mixed-case-flag nil) 1502 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
1525 (setq lower-case-flag nil) 1503 ;; (replace-match "\\1 \\2" t))
1526 ;; (setq upper-case-flag nil) 1504
1527 (setq begin-again-flag nil)) 1505 (unless (search-forward " " nil t)
1528 1506 (goto-char (point-min))
1529 ;; Initialize for this iteration of the loop. 1507 (cond ((search-forward "_" nil t)
1530 (mail-extr-skip-whitespace-forward) 1508 ;; Handle the *idiotic* use of underlines as spaces.
1531 (if (eq word-count 0) (narrow-to-region (point) (point-max))) 1509 ;; Example: fml@foo.bar.dom (First_M._Last)
1532 (setq this-word-beg (point)) 1510 (goto-char (point-min))
1533 (setq drop-this-word-if-trailing-flag nil) 1511 (while (search-forward "_" nil t)
1534 1512 (replace-match " " t)))
1535 ;; Decide what to do based on what we are looking at. 1513 ((search-forward "." nil t)
1536 (cond 1514 ;; Fix . used as space
1537 1515 ;; Example: danj1@cb.att.com (daniel.jacobson)
1538 ;; Delete title 1516 (goto-char (point-min))
1539 ((and (eq word-count 0) 1517 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1540 (looking-at mail-extr-full-name-prefixes)) 1518 (replace-match "\\1 \\2" t)))))
1541 (goto-char (match-end 0)) 1519
1520 ;; Loop over the words (and other junk) in the name.
1521 (goto-char (point-min))
1522 (while (not name-done-flag)
1523
1524 (when word-found-flag
1525 ;; Last time through this loop we skipped over a word.
1526 (setq last-word-beg this-word-beg)
1527 (setq drop-last-word-if-trailing-flag
1528 drop-this-word-if-trailing-flag)
1529 (setq word-found-flag nil))
1530
1531 (when begin-again-flag
1532 ;; Last time through the loop we found something that
1533 ;; indicates we should pretend we are beginning again from
1534 ;; the start.
1535 (setq word-count 0)
1536 (setq last-word-beg nil)
1537 (setq drop-last-word-if-trailing-flag nil)
1538 (setq mixed-case-flag nil)
1539 (setq lower-case-flag nil)
1540 ;; (setq upper-case-flag nil)
1541 (setq begin-again-flag nil))
1542
1543 ;; Initialize for this iteration of the loop.
1544 (mail-extr-skip-whitespace-forward)
1545 (if (eq word-count 0) (narrow-to-region (point) (point-max)))
1546 (setq this-word-beg (point))
1547 (setq drop-this-word-if-trailing-flag nil)
1548
1549 ;; Decide what to do based on what we are looking at.
1550 (cond
1551
1552 ;; Delete title
1553 ((and (eq word-count 0)
1554 (looking-at mail-extr-full-name-prefixes))
1555 (goto-char (match-end 0))
1556 (narrow-to-region (point) (point-max)))
1557
1558 ;; Stop after name suffix
1559 ((and (>= word-count 2)
1560 (looking-at mail-extr-full-name-suffix-pattern))
1561 (mail-extr-skip-whitespace-backward)
1562 (setq suffix-flag (point))
1563 (if (eq ?, (following-char))
1564 (forward-char 1)
1565 (insert ?,))
1566 ;; Enforce at least one space after comma
1567 (or (eq ?\ (following-char))
1568 (insert ?\ ))
1569 (mail-extr-skip-whitespace-forward)
1570 (cond ((memq (following-char) '(?j ?J ?s ?S))
1571 (capitalize-word 1)
1572 (if (eq (following-char) ?.)
1573 (forward-char 1)
1574 (insert ?.)))
1575 (t
1576 (upcase-word 1)))
1577 (setq word-found-flag t)
1578 (setq name-done-flag t))
1579
1580 ;; Handle SCA names
1581 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
1582 (goto-char (match-beginning 1))
1583 (narrow-to-region (point) (point-max))
1584 (setq begin-again-flag t))
1585
1586 ;; Check for initial last name followed by comma
1587 ((and (eq ?, (following-char))
1588 (eq word-count 1))
1589 (forward-char 1)
1590 (setq last-name-comma-flag t)
1591 (or (eq ?\ (following-char))
1592 (insert ?\ )))
1593
1594 ;; Stop before trailing comma-separated comment
1595 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1596 ;; *** This case is redundant???
1597 ;;((eq ?, (following-char))
1598 ;; (setq name-done-flag t))
1599
1600 ;; Delete parenthesized/quoted comment/nickname
1601 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
1602 (setq cbeg (point))
1603 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1604 (cond ((memq (following-char) '(?\' ?\`))
1605 (or (search-forward "'" nil t
1606 (if (eq ?\' (following-char)) 2 1))
1607 (delete-char 1)))
1608 (t
1609 (or (mail-extr-safe-move-sexp 1)
1610 (goto-char (point-max)))))
1611 (set-syntax-table mail-extr-address-text-syntax-table)
1612 (setq cend (point))
1613 (cond
1614 ;; Handle case of entire name being quoted
1615 ((and (eq word-count 0)
1616 (looking-at " *\\'")
1617 (>= (- cend cbeg) 2))
1618 (narrow-to-region (1+ cbeg) (1- cend))
1619 (goto-char (point-min)))
1620 (t
1621 ;; Handle case of quoted initial
1622 (if (and (or (= 3 (- cend cbeg))
1623 (and (= 4 (- cend cbeg))
1624 (eq ?. (char-after (+ 2 cbeg)))))
1625 (not (looking-at " *\\'")))
1626 (setq initial (char-after (1+ cbeg)))
1627 (setq initial nil))
1628 (delete-region cbeg cend)
1629 (if initial
1630 (insert initial ". ")))))
1631
1632 ;; Handle *Stupid* VMS date stamps
1633 ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
1634 (replace-match "" t))
1635
1636 ;; Handle Chinese characters.
1637 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
1638 (goto-char (match-end 0))
1639 (setq word-found-flag t))
1640
1641 ;; Skip initial garbage characters.
1642 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1643 ((and (eq word-count 0)
1644 (looking-at mail-extr-leading-garbage))
1645 (goto-char (match-end 0))
1646 ;; *** Skip backward over these???
1647 ;; (skip-chars-backward "& \"")
1648 (narrow-to-region (point) (point-max)))
1649
1650 ;; Various stopping points
1651 ((or
1652
1653 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
1654 ;; words. Example: XT-DEM.
1655 (and (>= word-count 2)
1656 mixed-case-flag
1657 (looking-at mail-extr-weird-acronym-pattern)
1658 (not (looking-at mail-extr-roman-numeral-pattern)))
1659
1660 ;; Stop before trailing alternative address
1661 (looking-at mail-extr-alternative-address-pattern)
1662
1663 ;; Stop before trailing comment not introduced by comma
1664 ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
1665 (looking-at mail-extr-trailing-comment-start-pattern)
1666
1667 ;; Stop before telephone numbers
1668 (and (>= word-count 1)
1669 (looking-at mail-extr-telephone-extension-pattern)))
1670 (setq name-done-flag t))
1671
1672 ;; Delete ham radio call signs
1673 ((looking-at mail-extr-ham-call-sign-pattern)
1674 (delete-region (match-beginning 0) (match-end 0)))
1675
1676 ;; Fixup initials
1677 ((looking-at mail-extr-initial-pattern)
1678 (or (eq (following-char) (upcase (following-char)))
1679 (setq lower-case-flag t))
1680 (forward-char 1)
1681 (if (eq ?. (following-char))
1682 (forward-char 1)
1683 (insert ?.))
1684 (or (eq ?\ (following-char))
1685 (insert ?\ ))
1686 (setq word-found-flag t))
1687
1688 ;; Handle BITNET LISTSERV list names.
1689 ((and (eq word-count 0)
1690 (looking-at mail-extr-listserv-list-name-pattern))
1691 (narrow-to-region (match-beginning 1) (match-end 1))
1692 (setq word-found-flag t)
1693 (setq name-done-flag t))
1694
1695 ;; Handle & substitution, when & is last and is not first.
1696 ((and (> word-count 0)
1697 (eq ?\ (preceding-char))
1698 (eq (following-char) ?&)
1699 (eq (1+ (point)) (point-max)))
1700 (delete-char 1)
1701 (capitalize-region
1702 (point)
1703 (progn
1704 (insert-buffer-substring canonicalization-buffer
1705 mbox-beg mbox-end)
1706 (point)))
1707 (setq disable-initial-guessing-flag t)
1708 (setq word-found-flag t))
1709
1710 ;; Handle & between names, as in "Bob & Susie".
1711 ((and (> word-count 0) (eq (following-char) ?\&))
1712 (setq name-beg (point))
1713 (setq name-end (1+ name-beg))
1714 (setq word-found-flag t)
1715 (goto-char name-end))
1716
1717 ;; Regular name words
1718 ((looking-at mail-extr-name-pattern)
1719 (setq name-beg (point))
1720 (setq name-end (match-end 0))
1721
1722 ;; Certain words will be dropped if they are at the end.
1723 (and (>= word-count 2)
1724 (not lower-case-flag)
1725 (or
1726 ;; Trailing 4-or-more letter lowercase words preceded by
1727 ;; mixed case or uppercase words will be dropped.
1728 (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'")
1729 ;; Drop a trailing word which is terminated with a period.
1730 (eq ?. (char-after (1- name-end))))
1731 (setq drop-this-word-if-trailing-flag t))
1732
1733 ;; Set the flags that indicate whether we have seen a lowercase
1734 ;; word, a mixed case word, and an uppercase word.
1735 (if (re-search-forward "[[:lower:]]" name-end t)
1736 (if (progn
1737 (goto-char name-beg)
1738 (re-search-forward "[[:upper:]]" name-end t))
1739 (setq mixed-case-flag t)
1740 (setq lower-case-flag t))
1741 ;; (setq upper-case-flag t)
1742 )
1743
1744 (goto-char name-end)
1745 (setq word-found-flag t))
1746
1747 ;; Allow a number as a word, if it doesn't mean anything else.
1748 ((looking-at "[0-9]+\\>")
1749 (setq name-beg (point))
1750 (setq name-end (match-end 0))
1751 (goto-char name-end)
1752 (setq word-found-flag t))
1753
1754 (t
1755 (setq name-done-flag t)
1756 ))
1757
1758 ;; Count any word that we skipped over.
1759 (if word-found-flag
1760 (setq word-count (1+ word-count))))
1761
1762 ;; If the last thing in the name is 2 or more periods, or one or more
1763 ;; other sentence terminators (but not a single period) then keep them
1764 ;; and the preceding word. This is for the benefit of whole sentences
1765 ;; in the name field: it's better behavior than dropping the last word
1766 ;; of the sentence...
1767 (if (and (not suffix-flag)
1768 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
1769 (goto-char (setq suffix-flag (point-max))))
1770
1771 ;; Drop everything after point and certain trailing words.
1772 (narrow-to-region (point-min)
1773 (or (and drop-last-word-if-trailing-flag
1774 last-word-beg)
1775 (point)))
1776
1777 ;; Xerox's mailers SUCK!!!!!!
1778 ;; We simply refuse to believe that any last name is PARC or ADOC.
1779 ;; If it looks like that is the last name, that there is no meaningful
1780 ;; here at all. Actually I guess it would be best to map patterns
1781 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
1782 ;; actually know that that is what's going on.
1783 (unless suffix-flag
1784 (goto-char (point-min))
1785 (let ((case-fold-search t))
1786 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
1787 (erase-buffer))))
1788
1789 ;; If last name first put it at end (but before suffix)
1790 (when last-name-comma-flag
1791 (goto-char (point-min))
1792 (search-forward ",")
1793 (setq name-end (1- (point)))
1794 (goto-char (or suffix-flag (point-max)))
1795 (or (eq ?\ (preceding-char))
1796 (insert ?\ ))
1797 (insert-buffer-substring (current-buffer) (point-min) name-end)
1798 (goto-char name-end)
1799 (skip-chars-forward "\t ,")
1542 (narrow-to-region (point) (point-max))) 1800 (narrow-to-region (point) (point-max)))
1543 1801
1544 ;; Stop after name suffix 1802 ;; Delete leading and trailing junk characters.
1545 ((and (>= word-count 2) 1803 ;; *** This is probably completely unneeded now.
1546 (looking-at mail-extr-full-name-suffix-pattern)) 1804 ;;(goto-char (point-max))
1547 (mail-extr-skip-whitespace-backward) 1805 ;;(skip-chars-backward mail-extr-non-end-name-chars)
1548 (setq suffix-flag (point)) 1806 ;;(if (eq ?. (following-char))
1549 (if (eq ?, (following-char)) 1807 ;; (forward-char 1))
1550 (forward-char 1) 1808 ;;(narrow-to-region (point)
1551 (insert ?,)) 1809 ;; (progn
1552 ;; Enforce at least one space after comma 1810 ;; (goto-char (point-min))
1553 (or (eq ?\ (following-char)) 1811 ;; (skip-chars-forward mail-extr-non-begin-name-chars)
1554 (insert ?\ )) 1812 ;; (point)))
1555 (mail-extr-skip-whitespace-forward) 1813
1556 (cond ((memq (following-char) '(?j ?J ?s ?S)) 1814 ;; Compress whitespace
1557 (capitalize-word 1)
1558 (if (eq (following-char) ?.)
1559 (forward-char 1)
1560 (insert ?.)))
1561 (t
1562 (upcase-word 1)))
1563 (setq word-found-flag t)
1564 (setq name-done-flag t))
1565
1566 ;; Handle SCA names
1567 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
1568 (goto-char (match-beginning 1))
1569 (narrow-to-region (point) (point-max))
1570 (setq begin-again-flag t))
1571
1572 ;; Check for initial last name followed by comma
1573 ((and (eq ?, (following-char))
1574 (eq word-count 1))
1575 (forward-char 1)
1576 (setq last-name-comma-flag t)
1577 (or (eq ?\ (following-char))
1578 (insert ?\ )))
1579
1580 ;; Stop before trailing comma-separated comment
1581 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1582 ;; *** This case is redundant???
1583 ;;((eq ?, (following-char))
1584 ;; (setq name-done-flag t))
1585
1586 ;; Delete parenthesized/quoted comment/nickname
1587 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
1588 (setq cbeg (point))
1589 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1590 (cond ((memq (following-char) '(?\' ?\`))
1591 (or (search-forward "'" nil t
1592 (if (eq ?\' (following-char)) 2 1))
1593 (delete-char 1)))
1594 (t
1595 (or (mail-extr-safe-move-sexp 1)
1596 (goto-char (point-max)))))
1597 (set-syntax-table mail-extr-address-text-syntax-table)
1598 (setq cend (point))
1599 (cond
1600 ;; Handle case of entire name being quoted
1601 ((and (eq word-count 0)
1602 (looking-at " *\\'")
1603 (>= (- cend cbeg) 2))
1604 (narrow-to-region (1+ cbeg) (1- cend))
1605 (goto-char (point-min)))
1606 (t
1607 ;; Handle case of quoted initial
1608 (if (and (or (= 3 (- cend cbeg))
1609 (and (= 4 (- cend cbeg))
1610 (eq ?. (char-after (+ 2 cbeg)))))
1611 (not (looking-at " *\\'")))
1612 (setq initial (char-after (1+ cbeg)))
1613 (setq initial nil))
1614 (delete-region cbeg cend)
1615 (if initial
1616 (insert initial ". ")))))
1617
1618 ;; Handle *Stupid* VMS date stamps
1619 ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
1620 (replace-match "" t))
1621
1622 ;; Handle Chinese characters.
1623 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
1624 (goto-char (match-end 0))
1625 (setq word-found-flag t))
1626
1627 ;; Skip initial garbage characters.
1628 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1629 ((and (eq word-count 0)
1630 (looking-at mail-extr-leading-garbage))
1631 (goto-char (match-end 0))
1632 ;; *** Skip backward over these???
1633 ;; (skip-chars-backward "& \"")
1634 (narrow-to-region (point) (point-max)))
1635
1636 ;; Various stopping points
1637 ((or
1638
1639 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
1640 ;; words. Example: XT-DEM.
1641 (and (>= word-count 2)
1642 mixed-case-flag
1643 (looking-at mail-extr-weird-acronym-pattern)
1644 (not (looking-at mail-extr-roman-numeral-pattern)))
1645
1646 ;; Stop before trailing alternative address
1647 (looking-at mail-extr-alternative-address-pattern)
1648
1649 ;; Stop before trailing comment not introduced by comma
1650 ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
1651 (looking-at mail-extr-trailing-comment-start-pattern)
1652
1653 ;; Stop before telephone numbers
1654 (and (>= word-count 1)
1655 (looking-at mail-extr-telephone-extension-pattern)))
1656 (setq name-done-flag t))
1657
1658 ;; Delete ham radio call signs
1659 ((looking-at mail-extr-ham-call-sign-pattern)
1660 (delete-region (match-beginning 0) (match-end 0)))
1661
1662 ;; Fixup initials
1663 ((looking-at mail-extr-initial-pattern)
1664 (or (eq (following-char) (upcase (following-char)))
1665 (setq lower-case-flag t))
1666 (forward-char 1)
1667 (if (eq ?. (following-char))
1668 (forward-char 1)
1669 (insert ?.))
1670 (or (eq ?\ (following-char))
1671 (insert ?\ ))
1672 (setq word-found-flag t))
1673
1674 ;; Handle BITNET LISTSERV list names.
1675 ((and (eq word-count 0)
1676 (looking-at mail-extr-listserv-list-name-pattern))
1677 (narrow-to-region (match-beginning 1) (match-end 1))
1678 (setq word-found-flag t)
1679 (setq name-done-flag t))
1680
1681 ;; Handle & substitution, when & is last and is not first.
1682 ((and (> word-count 0)
1683 (eq ?\ (preceding-char))
1684 (eq (following-char) ?&)
1685 (eq (1+ (point)) (point-max)))
1686 (delete-char 1)
1687 (capitalize-region
1688 (point)
1689 (progn
1690 (insert-buffer-substring canonicalization-buffer
1691 mbox-beg mbox-end)
1692 (point)))
1693 (setq disable-initial-guessing-flag t)
1694 (setq word-found-flag t))
1695
1696 ;; Handle & between names, as in "Bob & Susie".
1697 ((and (> word-count 0) (eq (following-char) ?\&))
1698 (setq name-beg (point))
1699 (setq name-end (1+ name-beg))
1700 (setq word-found-flag t)
1701 (goto-char name-end))
1702
1703 ;; Regular name words
1704 ((looking-at mail-extr-name-pattern)
1705 (setq name-beg (point))
1706 (setq name-end (match-end 0))
1707
1708 ;; Certain words will be dropped if they are at the end.
1709 (and (>= word-count 2)
1710 (not lower-case-flag)
1711 (or
1712 ;; Trailing 4-or-more letter lowercase words preceded by
1713 ;; mixed case or uppercase words will be dropped.
1714 (looking-at "[[:lower:]]\\{4,\\}[ \t]*\\'")
1715 ;; Drop a trailing word which is terminated with a period.
1716 (eq ?. (char-after (1- name-end))))
1717 (setq drop-this-word-if-trailing-flag t))
1718
1719 ;; Set the flags that indicate whether we have seen a lowercase
1720 ;; word, a mixed case word, and an uppercase word.
1721 (if (re-search-forward "[[:lower:]]" name-end t)
1722 (if (progn
1723 (goto-char name-beg)
1724 (re-search-forward "[[:upper:]]" name-end t))
1725 (setq mixed-case-flag t)
1726 (setq lower-case-flag t))
1727 ;; (setq upper-case-flag t)
1728 )
1729
1730 (goto-char name-end)
1731 (setq word-found-flag t))
1732
1733 ;; Allow a number as a word, if it doesn't mean anything else.
1734 ((looking-at "[0-9]+\\>")
1735 (setq name-beg (point))
1736 (setq name-end (match-end 0))
1737 (goto-char name-end)
1738 (setq word-found-flag t))
1739
1740 (t
1741 (setq name-done-flag t)
1742 ))
1743
1744 ;; Count any word that we skipped over.
1745 (if word-found-flag
1746 (setq word-count (1+ word-count))))
1747
1748 ;; If the last thing in the name is 2 or more periods, or one or more
1749 ;; other sentence terminators (but not a single period) then keep them
1750 ;; and the preceding word. This is for the benefit of whole sentences
1751 ;; in the name field: it's better behavior than dropping the last word
1752 ;; of the sentence...
1753 (if (and (not suffix-flag)
1754 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
1755 (goto-char (setq suffix-flag (point-max))))
1756
1757 ;; Drop everything after point and certain trailing words.
1758 (narrow-to-region (point-min)
1759 (or (and drop-last-word-if-trailing-flag
1760 last-word-beg)
1761 (point)))
1762
1763 ;; Xerox's mailers SUCK!!!!!!
1764 ;; We simply refuse to believe that any last name is PARC or ADOC.
1765 ;; If it looks like that is the last name, that there is no meaningful
1766 ;; here at all. Actually I guess it would be best to map patterns
1767 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
1768 ;; actually know that that is what's going on.
1769 (unless suffix-flag
1770 (goto-char (point-min)) 1815 (goto-char (point-min))
1771 (let ((case-fold-search t)) 1816 (while (re-search-forward "[ \t\n]+" nil t)
1772 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'") 1817 (replace-match (if (eobp) "" " ") t))
1773 (erase-buffer)))) 1818 ))))
1774
1775 ;; If last name first put it at end (but before suffix)
1776 (when last-name-comma-flag
1777 (goto-char (point-min))
1778 (search-forward ",")
1779 (setq name-end (1- (point)))
1780 (goto-char (or suffix-flag (point-max)))
1781 (or (eq ?\ (preceding-char))
1782 (insert ?\ ))
1783 (insert-buffer-substring (current-buffer) (point-min) name-end)
1784 (goto-char name-end)
1785 (skip-chars-forward "\t ,")
1786 (narrow-to-region (point) (point-max)))
1787
1788 ;; Delete leading and trailing junk characters.
1789 ;; *** This is probably completely unneeded now.
1790 ;;(goto-char (point-max))
1791 ;;(skip-chars-backward mail-extr-non-end-name-chars)
1792 ;;(if (eq ?. (following-char))
1793 ;; (forward-char 1))
1794 ;;(narrow-to-region (point)
1795 ;; (progn
1796 ;; (goto-char (point-min))
1797 ;; (skip-chars-forward mail-extr-non-begin-name-chars)
1798 ;; (point)))
1799
1800 ;; Compress whitespace
1801 (goto-char (point-min))
1802 (while (re-search-forward "[ \t\n]+" nil t)
1803 (replace-match (if (eobp) "" " ") t))
1804 )))
1805 1819
1806 1820
1807 1821
1808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1822 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1809 ;; 1823 ;;