Mercurial > emacs

diff lisp/mail/mail-extr.el @ 809:8a0066235d56
Initial revision
author: Eric S. Raymond <esr@snark.thyrsus.com>
date: Fri, 17 Jul 1992 06:48:03 +0000
children: 20674ae6bf52
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/mail/mail-extr.el	Fri Jul 17 06:48:03 1992 +0000
@@ -0,0 +1,1469 @@
+;;; mail-extr.el --- extract full name and address from RFC 822 mail header.
+
+;; Author: Joe Wells <jbw@cs.bu.edu>
+;; Last-Modified: 7 Apr 1992
+;; Version: 1.0
+;; Adapted-By: ESR
+;; Keywords: mail
+
+;; Copyright (C) 1992 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 1, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+;;; Commentary:
+
+;; Here is `mail-extr', a package for extracting full names and canonical
+;; addresses from RFC 822 mail headers.  It is intended to be hooked into
+;; other Emacs Lisp packages that deal with RFC 822 format messages, such as
+;; Gnews, GNUS, RMAIL, MH-E, BBDB, VM, Supercite, etc.  Thus, this release is
+;; mainly for Emacs Lisp developers.
+
+;; There are two main benefits:
+
+;; 1. Higher probability of getting the correct full name for a human than
+;;    any other package I know of.  (On the other hand, it will cheerfully
+;;    mangle non-human names/comments.)
+;; 2. Address part is put in a canonical form.
+
+;; The interface is not yet carved in stone; please give me suggestions.
+
+;; I have an extensive test-case collection of funny addresses if you want to
+;; work with the code.  Developing this code requires frequent testing to
+;; make sure you're not breaking functionality.  I'm not posting the
+;; test-cases because they take over 100K.
+
+;; If you find an address that mail-extr fails on, please send it to me along
+;; with what you think the correct results should be.  I do not consider it a
+;; bug if mail-extr mangles a comment that does not correspond to a real
+;; human full name, although I would prefer that mail-extr would return the
+;; comment as-is.
+
+;; Features:
+
+;; * Full name handling:
+
+;;   * knows where full names can be found in an address.
+;;   * avoids using empty comments and quoted text.
+;;   * extracts full names from mailbox names.
+;;   * recognizes common formats for comments after a full name.
+;;   * puts a period and a space after each initial.
+;;   * understands & referring to the mailbox name capitalized.
+;;   * strips name prefixes like "Prof.", etc..
+;;   * understands what characters can occur in names (not just letters).
+;;   * figures out middle initial from mailbox name.
+;;   * removes funny nicknames.
+;;   * keeps suffixes such as Jr., Sr., III, etc.
+;;   * reorders "Last, First" type names.
+
+;; * Address handling:
+
+;;   * parses rfc822 quoted text, comments, and domain literals.
+;;   * parses rfc822 multi-line headers.
+;;   * does something reasonable with rfc822 GROUP addresses.
+;;   * handles many rfc822 noncompliant and garbage addresses.
+;;   * canonicalizes addresses (after stripping comments/phrases outside <>).
+;;     * converts ! addresses into .UUCP and %-style addresses.
+;;     * converts rfc822 ROUTE addresses to %-style addresses.
+;;     * truncates %-style addresses at leftmost fully qualified domain name.
+;;     * handles local relative precedence of ! vs. % and @ (untested).
+
+;; It does almost no string creation.  It primarily uses the built-in
+;; parsing routines with the appropriate syntax tables.  This should
+;; result in greater speed.
+
+;; TODO:
+
+;; * handle all test cases.  (This will take forever.)
+;; * software to pick the correct header to use (eg., "Senders-Name:").
+;; * multiple addresses in the "From:" header (almost all of the necessary
+;;   code is there).
+;; * flag to not treat `,' as an address separator.  (This is useful when
+;;   there is a "From:" header but no "Sender:" header, because then there
+;;   is only allowed to be one address.)
+;; * mailbox name does not necessarily contain full name.
+;; * fixing capitalization when it's all upper or lowercase.  (Hard!)
+;; * some of the domain literal handling is missing.  (But I've never even
+;;   seen one of these in a mail address, so maybe no big deal.)
+;; * arrange to have syntax tables byte-compiled.
+;; * speed hacks.
+;; * delete unused variables.
+;; * arrange for testing with different relative precedences of ! vs. @
+;;   and %.
+;; * put variant-method back into mail-extract-address-components.
+;; * insert documentation strings!
+;; * handle X.400-gatewayed addresses according to RFC 1148.
+
+;;; Change Log: 
+;; 
+;; Mon Apr  6 23:59:09 1992  Joe Wells  (jbw at bigbird.bu.edu)
+;; 
+;; 	* Cleaned up some more.  Release version 1.0 to world.
+;; 
+;; Sun Apr  5 19:39:08 1992  Joe Wells  (jbw at bigbird.bu.edu)
+;; 
+;; 	* Cleaned up full name extraction extensively.
+;; 
+;; Sun Feb  2 14:45:24 1992  Joe Wells  (jbw at bigbird.bu.edu)
+;; 
+;; 	* Total rewrite.  Integrated mail-canonicalize-address into
+;; 	mail-extract-address-components.  Now handles GROUP addresses more
+;; 	or less correctly.  Better handling of lots of different cases.
+;; 
+;; Fri Jun 14 19:39:50 1991
+;;	* Created.
+
+;;; Code:
+
+;; Variable definitions.
+
+(defvar mail-@-binds-tighter-than-! nil)
+
+;;----------------------------------------------------------------------
+;; what orderings are meaningful?????
+;;(defvar mail-operator-precedence-list '(?! ?% ?@))
+;; Right operand of a % or a @ must be a domain name, period.  No other
+;; operators allowed.  Left operand of a @ is an address relative to that
+;; site.
+
+;; Left operand of a ! must be a domain name.  Right operand is an
+;; arbitrary address.
+;;----------------------------------------------------------------------
+
+(defconst mail-space-char 32)
+
+(defconst mail-whitespace " \t\n")
+
+;; Any character that can occur in a name in an RFC822 address.
+;; Yes, there are weird people with digits in their names.
+(defconst mail-all-letters "A-Za-z---{|}'~0-9`.")
+
+;; Any character that can occur in a name, not counting characters that
+;; separate parts of a multipart name.
+(defconst mail-all-letters-but-separators "A-Za-z{|}'~0-9`")
+
+;; Any character that can start a name
+(defconst mail-first-letters "A-Za-z")
+
+;; Any character that can end a name.
+(defconst mail-last-letters "A-Za-z`'.")
+
+;; Matches an initial not followed by both a period and a space. 
+(defconst mail-bad-initials-pattern
+  (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
+	  mail-all-letters mail-first-letters mail-all-letters))
+
+(defconst mail-non-name-chars (concat "^" mail-all-letters "."))
+
+(defconst mail-non-begin-name-chars (concat "^" mail-first-letters))
+
+(defconst mail-non-end-name-chars (concat "^" mail-last-letters))
+
+;; Matches periods used instead of spaces.  Must not match the period
+;; following an initial.
+(defconst mail-bad-\.-pattern
+  (format "\\([%s][%s]\\)\\.+\\([%s]\\)"
+	  mail-all-letters mail-last-letters mail-first-letters))
+
+;; Matches an embedded or leading nickname that should be removed.
+(defconst mail-nickname-pattern
+  (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] "
+	  mail-all-letters))
+
+;; Matches a leading title that is not part of the name (does not
+;; contribute to uniquely identifying the person).
+(defconst mail-full-name-prefixes
+      '"\\` *\\(Prof\\|Dr\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.? ")
+
+;; Matches the occurrence of a generational name suffix, and the last
+;; character of the preceding name.
+(defconst mail-full-name-suffix-pattern
+  (format
+   "\\(,? ?\\([JjSs]r\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
+   mail-all-letters mail-all-letters))
+
+(defconst mail-roman-numeral-pattern
+  "V?I+V?\\b")
+
+;; Matches a trailing uppercase (with other characters possible) acronym.
+;; Must not match a trailing uppercase last name or trailing initial
+(defconst mail-weird-acronym-pattern "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)")
+      
+;; Matches a mixed-case or lowercase name (not an initial).
+(defconst mail-mixed-case-name-pattern
+  (format
+   "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)"
+   mail-all-letters mail-last-letters
+   mail-first-letters mail-all-letters mail-all-letters mail-last-letters
+   mail-first-letters mail-all-letters))
+
+;; Matches a trailing alternative address.
+(defconst mail-alternative-address-pattern "[a-zA-Z.]+[!@][a-zA-Z.]")
+
+;; Matches a variety of trailing comments not including comma-delimited
+;; comments.
+(defconst mail-trailing-comment-start-pattern " [-{]\\|--\\|[+@#></\;]")
+
+;; Matches a name (not an initial).
+;; This doesn't force a word boundary at the end because sometimes a
+;; comment is separated by a `-' with no preceding space.
+(defconst mail-name-pattern
+  (format
+   "\\b[%s][%s]*[%s]"
+   mail-first-letters mail-all-letters mail-last-letters))
+
+(defconst mail-initial-pattern
+  (format "\\b[%s]\\([. ]\\|\\b\\)" mail-first-letters))
+
+;; Matches a single name before a comma.
+(defconst mail-last-name-first-pattern
+  (concat "\\`" mail-name-pattern ","))
+
+;; Matches telephone extensions.
+(defconst mail-telephone-extension-pattern
+  "\\(\\([Ee]xt\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+")
+
+;; Matches ham radio call signs.
+(defconst mail-ham-call-sign-pattern
+  "\\b[A-Z]+[0-9][A-Z0-9]*")
+
+;; Matches normal single-part name
+(defconst mail-normal-name-pattern
+  (format
+   "\\b[%s][%s]+[%s]"
+   mail-first-letters mail-all-letters-but-separators mail-last-letters))
+
+;; Matches normal two names with missing middle initial
+(defconst mail-two-name-pattern
+  (concat "\\`\\(" mail-normal-name-pattern
+	  "\\|" mail-initial-pattern
+	  "\\) +\\(" mail-normal-name-pattern "\\)\\(,\\|\\'\\)"))
+
+(defvar address-syntax-table (make-syntax-table))
+(defvar address-comment-syntax-table (make-syntax-table))
+(defvar address-domain-literal-syntax-table (make-syntax-table))
+(defvar address-text-comment-syntax-table (make-syntax-table))
+(defvar address-text-syntax-table (make-syntax-table))
+(mapcar
+ (function
+  (lambda (pair)
+    (let ((syntax-table (symbol-value (car pair))))
+      (mapcar
+       (function
+	(lambda (item)
+	  (if (eq 2 (length item))
+	      (modify-syntax-entry (car item) (car (cdr item)) syntax-table)
+	    (let ((char (car item))
+		  (bound (car (cdr item)))
+		  (syntax (car (cdr (cdr item)))))
+	      (while (<= char bound)
+		(modify-syntax-entry char syntax syntax-table)
+		(setq char (1+ char)))))))
+       (cdr pair)))))
+ '((address-syntax-table
+    (0  31   "w")			;control characters
+    (32      " ")			;SPC
+    (?! ?~   "w")			;printable characters
+    (127     "w")			;DEL
+    (128 255 "w")			;high-bit-on characters
+    (?\t " ")
+    (?\r " ")
+    (?\n " ")
+    (?\( ".")
+    (?\) ".")
+    (?<  ".")
+    (?>  ".")
+    (?@  ".")
+    (?,  ".")
+    (?\; ".")
+    (?:  ".")
+    (?\\ "\\")
+    (?\" "\"")
+    (?.  ".")
+    (?\[ ".")
+    (?\] ".")
+    ;; % and ! aren't RFC822 characters, but it is convenient to pretend
+    (?%  ".")
+    (?!  ".")
+    )
+   (address-comment-syntax-table
+    (0 255 "w")
+    (?\( "\(\)")
+    (?\) "\)\(")
+    (?\\ "\\"))
+   (address-domain-literal-syntax-table
+    (0 255 "w")
+    (?\[ "\(\]")			;??????
+    (?\] "\)\[")			;??????
+    (?\\ "\\"))
+   (address-text-comment-syntax-table
+    (0 255 "w")
+    (?\( "\(\)")
+    (?\) "\)\(")
+    (?\[ "\(\]")
+    (?\] "\)\[")
+    (?\{ "\(\}")
+    (?\} "\)\{")
+    (?\\ "\\")
+    (?\" "\"")
+    ;; (?\' "\)\`")
+    ;; (?\` "\(\'")
+    )
+   (address-text-syntax-table
+    (0 255 ".")
+    (?A ?Z "w")
+    (?a ?z "w")
+    (?-    "w")
+    (?\}   "w")
+    (?\{   "w")
+    (?|    "w")
+    (?\'   "w")
+    (?~    "w")
+    (?0 ?9 "w"))
+   ))
+
+
+;; Utility functions and macros.
+
+(defmacro undo-backslash-quoting (beg end)
+  (`(save-excursion
+      (save-restriction
+	(narrow-to-region (, beg) (, end))
+	(goto-char (point-min))
+	;; undo \ quoting
+	(while (re-search-forward "\\\\\\(.\\)" nil t)
+	  (replace-match "\\1")
+	  ;; CHECK: does this leave point after the replacement?
+	  )))))
+
+(defmacro mail-nuke-char-at (pos)
+  (` (save-excursion
+       (goto-char (, pos))
+       (delete-char 1)
+       (insert mail-space-char))))
+
+(defmacro mail-nuke-elements-outside-range (list-symbol beg-symbol end-symbol
+							&optional no-replace)
+  (` (progn
+       (setq temp (, list-symbol))
+       (while temp
+	 (cond ((or (> (car temp) (, end-symbol))
+		    (< (car temp) (, beg-symbol)))
+		(, (or no-replace
+		       (` (mail-nuke-char-at (car temp)))))
+		(setcar temp nil)))
+	 (setq temp (cdr temp)))
+       (setq (, list-symbol) (delq nil (, list-symbol))))))
+
+(defun mail-demarkerize (marker)
+  (and marker
+       (if (markerp marker)
+	   (let ((temp (marker-position marker)))
+	     (set-marker marker nil)
+	     temp)
+	 marker)))
+
+(defun mail-markerize (pos)
+  (and pos
+       (if (markerp pos)
+	   pos
+	 (copy-marker pos))))
+
+(defmacro mail-last-element (list)
+  "Return last element of LIST."
+  (` (let ((list (, list)))
+       (while (not (null (cdr list)))
+	 (setq list (cdr list)))
+       (car list))))
+  
+(defmacro safe-move-sexp (arg)
+  "Safely skip over one balanced sexp, if there is one.  Return t if success."
+  (` (condition-case error
+	 (progn
+	   (goto-char (scan-sexps (point) (, arg)))
+	   t)
+       (error
+	(if (string-equal (nth 1 error) "Unbalanced parentheses")
+	    nil
+	  (while t
+	    (signal (car error) (cdr error))))))))
+
+
+;; The main function to grind addresses
+
+(defun mail-extract-address-components (address)
+  "Given an rfc 822 ADDRESS, extract full name and canonical address.
+Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)."
+  (let ((canonicalization-buffer (get-buffer-create "*canonical address*"))
+	(extraction-buffer (get-buffer-create "*extract address components*"))
+	(foo 'bar)
+	char
+	multiple-addresses
+	<-pos >-pos @-pos :-pos ,-pos !-pos %-pos \;-pos
+	group-:-pos group-\;-pos route-addr-:-pos
+	record-pos-symbol
+	first-real-pos last-real-pos
+	phrase-beg phrase-end
+	comment-beg comment-end
+	quote-beg quote-end
+	atom-beg atom-end
+	mbox-beg mbox-end
+	\.-ends-name
+	temp
+	name-suffix
+	saved-point
+	fi mi li
+	saved-%-pos saved-!-pos saved-@-pos
+	domain-pos \.-pos insert-point)
+    
+    (save-excursion
+      (set-buffer extraction-buffer)
+      (buffer-flush-undo extraction-buffer)
+      (set-syntax-table address-syntax-table)
+      (widen)
+      (erase-buffer)
+      (setq case-fold-search nil)
+      
+      ;; Insert extra space at beginning to allow later replacement with <
+      ;; without having to move markers.
+      (insert mail-space-char address)
+      
+      ;; stolen from rfc822.el
+      ;; Unfold multiple lines.
+      (goto-char (point-min))
+      (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
+	(replace-match "\\1 " t))
+      
+      ;; first pass grabs useful information about address
+      (goto-char (point-min))
+      (while (progn
+	       (skip-chars-forward mail-whitespace)
+	       (not (eobp)))
+	(setq char (char-after (point)))
+	(or first-real-pos
+	    (if (not (eq char ?\())
+		(setq first-real-pos (point))))
+	(cond
+	 ;; comment
+	 ((eq char ?\()
+	  (set-syntax-table address-comment-syntax-table)
+	  ;; only record the first non-empty comment's position
+	  (if (and (not comment-beg)
+		   (save-excursion
+		     (forward-char 1)
+		     (skip-chars-forward mail-whitespace)
+		     (not (eq ?\) (char-after (point))))))
+	      (setq comment-beg (point)))
+	  ;; TODO: don't record if unbalanced
+	  (or (safe-move-sexp 1)
+	      (forward-char 1))
+	  (set-syntax-table address-syntax-table)
+	  (if (and comment-beg
+		   (not comment-end))
+	      (setq comment-end (point))))
+	 ;; quoted text
+	 ((eq char ?\")
+	  ;; only record the first non-empty quote's position
+	  (if (and (not quote-beg)
+		   (save-excursion
+		     (forward-char 1)
+		     (skip-chars-forward mail-whitespace)
+		     (not (eq ?\" (char-after (point))))))
+	      (setq quote-beg (point)))
+	  ;; TODO: don't record if unbalanced
+	  (or (safe-move-sexp 1)
+	      (forward-char 1))
+	  (if (and quote-beg
+		   (not quote-end))
+	      (setq quote-end (point))))
+	 ;; domain literals
+	 ((eq char ?\[)
+	  (set-syntax-table address-domain-literal-syntax-table)
+	  (or (safe-move-sexp 1)
+	      (forward-char 1))
+	  (set-syntax-table address-syntax-table))
+	 ;; commas delimit addresses when outside < > pairs.
+	 ((and (eq char ?,)
+	       (or (null <-pos)
+		   (and >-pos
+			;; handle weird munged addresses
+			(> (mail-last-element <-pos) (car >-pos)))))
+	  (setq multiple-addresses t)
+	  (delete-char 1)
+	  (narrow-to-region (point-min) (point)))
+	 ;; record the position of various interesting chars, determine
+	 ;; legality later.
+	 ((setq record-pos-symbol
+		(cdr (assq char
+			   '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
+			     (?: . :-pos) (?, . ,-pos) (?! . !-pos)
+			     (?% . %-pos) (?\; . \;-pos)))))
+	  (set record-pos-symbol
+	       (cons (point) (symbol-value record-pos-symbol)))
+	  (forward-char 1))
+	 ((eq char ?.)
+	  (forward-char 1))
+	 ((memq char '(
+		       ;; comment terminator illegal
+		       ?\)
+		       ;; domain literal terminator illegal
+		       ?\]
+		       ;; \ allowed only within quoted strings,
+		       ;; domain literals, and comments
+		       ?\\
+		       ))
+	  (mail-nuke-char-at (point))
+	  (forward-char 1))
+	 (t
+	  (forward-word 1)))
+	(or (eq char ?\()
+	    (setq last-real-pos (point))))
+      
+      ;; Use only the leftmost <, if any.  Replace all others with spaces.
+      (while (cdr <-pos)
+	(mail-nuke-char-at (car <-pos))
+	(setq <-pos (cdr <-pos)))
+      
+      ;; Use only the rightmost >, if any.  Replace all others with spaces.
+      (while (cdr >-pos)
+	(mail-nuke-char-at (nth 1 >-pos))
+	(setcdr >-pos (nthcdr 2 >-pos)))
+      
+      ;; If multiple @s and a :, but no < and >, insert around buffer.
+      ;; This commonly happens on the UUCP "From " line.  Ugh.
+      (cond ((and (> (length @-pos) 1)
+		  :-pos			;TODO: check if between @s
+		  (not <-pos))
+	     (goto-char (point-min))
+	     (delete-char 1)
+	     (setq <-pos (list (point)))
+	     (insert ?<)))
+      
+      ;; If < but no >, insert > in rightmost possible position
+      (cond ((and <-pos
+		  (null >-pos))
+	     (goto-char (point-max))
+	     (setq >-pos (list (point)))
+	     (insert ?>)))
+      
+      ;; If > but no <, replace > with space.
+      (cond ((and >-pos
+		  (null <-pos))
+	     (mail-nuke-char-at (car >-pos))
+	     (setq >-pos nil)))
+
+      ;; Turn >-pos and <-pos into non-lists
+      (setq >-pos (car >-pos)
+	    <-pos (car <-pos))
+      
+      ;; Trim other punctuation lists of items outside < > pair to handle
+      ;; stupid MTAs.
+      (cond (<-pos			; don't need to check >-pos also
+	     ;; handle bozo software that violates RFC 822 by sticking
+	     ;; punctuation marks outside of a < > pair
+	     (mail-nuke-elements-outside-range @-pos <-pos >-pos t)
+	     ;; RFC 822 says nothing about these two outside < >, but
+	     ;; remove those positions from the lists to make things
+	     ;; easier.
+	     (mail-nuke-elements-outside-range !-pos <-pos >-pos t)
+	     (mail-nuke-elements-outside-range %-pos <-pos >-pos t)))
+      
+      ;; Check for : that indicates GROUP list and for : part of
+      ;; ROUTE-ADDR spec.
+      ;; Can't possibly be more than two :.  Nuke any extra.
+      (while :-pos
+	(setq temp (car :-pos)
+	      :-pos (cdr :-pos))
+	(cond ((and <-pos >-pos
+		    (> temp <-pos)
+		    (< temp >-pos))
+	       (if (or route-addr-:-pos
+		       (< (length @-pos) 2)
+		       (> temp (car @-pos))
+		       (< temp (nth 1 @-pos)))
+		   (mail-nuke-char-at temp)
+		 (setq route-addr-:-pos temp)))
+	      ((or (not <-pos)
+		   (and <-pos
+			(< temp <-pos)))
+	       (setq group-:-pos temp))))
+      
+      ;; Nuke any ; that is in or to the left of a < > pair or to the left
+      ;; of a GROUP starting :.  Also, there may only be one ;.
+      (while \;-pos
+	(setq temp (car \;-pos)
+	      \;-pos (cdr \;-pos))
+	(cond ((and <-pos >-pos
+		    (> temp <-pos)
+		    (< temp >-pos))
+	       (mail-nuke-char-at temp))
+	      ((and (or (not group-:-pos)
+			(> temp group-:-pos))
+		    (not group-\;-pos))
+	       (setq group-\;-pos temp))))
+      
+      ;; Handle junk like ";@host.company.dom" that sendmail adds.
+      ;; **** should I remember comment positions?
+      (and group-\;-pos
+	   ;; this is fine for now
+	   (mail-nuke-elements-outside-range !-pos group-:-pos group-\;-pos t)
+	   (mail-nuke-elements-outside-range @-pos group-:-pos group-\;-pos t)
+	   (mail-nuke-elements-outside-range %-pos group-:-pos group-\;-pos t)
+	   (mail-nuke-elements-outside-range ,-pos group-:-pos group-\;-pos t)
+	   (and last-real-pos
+		(> last-real-pos (1+ group-\;-pos))
+		(setq last-real-pos (1+ group-\;-pos)))
+	   (and comment-end
+		(> comment-end group-\;-pos)
+		(setq comment-end nil
+		      comment-beg nil))
+	   (and quote-end
+		(> quote-end group-\;-pos)
+		(setq quote-end nil
+		      quote-beg nil))
+	   (narrow-to-region (point-min) group-\;-pos))
+      
+      ;; Any commas must be between < and : of ROUTE-ADDR.  Nuke any
+      ;; others.
+      ;; Hell, go ahead an nuke all of the commas.
+      ;; **** This will cause problems when we start handling commas in
+      ;; the PHRASE part .... no it won't ... yes it will ... ?????
+      (mail-nuke-elements-outside-range ,-pos 1 1)
+      
+      ;; can only have multiple @s inside < >.  The fact that some MTAs
+      ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
+      ;; handled above.
+      
+      ;; Locate PHRASE part of ROUTE-ADDR.
+      (cond (<-pos
+	     (goto-char <-pos)
+	     (skip-chars-backward mail-whitespace)
+	     (setq phrase-end (point))
+	     (goto-char (or ;;group-:-pos
+			    (point-min)))
+	     (skip-chars-forward mail-whitespace)
+	     (if (< (point) phrase-end)
+		 (setq phrase-beg (point))
+	       (setq phrase-end nil))))
+      
+      ;; handle ROUTE-ADDRS with real ROUTEs.
+      ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
+      ;; any % or ! must be semantically meaningless.
+      ;; TODO: do this processing into canonicalization buffer
+      (cond (route-addr-:-pos
+	     (setq !-pos nil
+		   %-pos nil
+		   >-pos (copy-marker >-pos)
+		   route-addr-:-pos (copy-marker route-addr-:-pos))
+	     (goto-char >-pos)
+	     (insert-before-markers ?X)
+	     (goto-char (car @-pos))
+	     (while (setq @-pos (cdr @-pos))
+	       (delete-char 1)
+	       (setq %-pos (cons (point-marker) %-pos))
+	       (insert "%")
+	       (goto-char (1- >-pos))
+	       (save-excursion
+		 (insert-buffer-substring extraction-buffer
+					  (car @-pos) route-addr-:-pos)
+		 (delete-region (car @-pos) route-addr-:-pos))
+	       (or (cdr @-pos)
+		   (setq saved-@-pos (list (point)))))
+	     (setq @-pos saved-@-pos)
+	     (goto-char >-pos)
+	     (delete-char -1)
+	     (mail-nuke-char-at route-addr-:-pos)
+	     (mail-demarkerize route-addr-:-pos)
+	     (setq route-addr-:-pos nil
+		   >-pos (mail-demarkerize >-pos)
+		   %-pos (mapcar 'mail-demarkerize %-pos))))
+      
+      ;; de-listify @-pos
+      (setq @-pos (car @-pos))
+      
+      ;; TODO: remove comments in the middle of an address
+      
+      (set-buffer canonicalization-buffer)
+      
+      (buffer-flush-undo canonicalization-buffer)
+      (set-syntax-table address-syntax-table)
+      (setq case-fold-search nil)
+      
+      (widen)
+      (erase-buffer)
+      (insert-buffer-substring extraction-buffer)
+      
+      (if <-pos
+	  (narrow-to-region (progn
+			      (goto-char (1+ <-pos))
+			      (skip-chars-forward mail-whitespace)
+			      (point))
+			    >-pos)
+	;; ****** Oh no!  What if the address is completely empty!
+	(narrow-to-region first-real-pos last-real-pos))
+      
+      (and @-pos %-pos
+	   (mail-nuke-elements-outside-range %-pos (point-min) @-pos))
+      (and %-pos !-pos
+	   (mail-nuke-elements-outside-range !-pos (point-min) (car %-pos)))
+      (and @-pos !-pos (not %-pos)
+	   (mail-nuke-elements-outside-range !-pos (point-min) @-pos))
+      
+      ;; Error condition:?? (and %-pos (not @-pos))
+
+      (cond (!-pos
+	     ;; **** I don't understand this save-restriction and the
+	     ;; narrow-to-region inside it.  Why did I do that?
+	     (save-restriction
+	       (cond ((and @-pos
+			   mail-@-binds-tighter-than-!)
+		      (goto-char @-pos)
+		      (setq %-pos (cons (point) %-pos)
+			    @-pos nil)
+		      (delete-char 1)
+		      (insert "%")
+		      (setq insert-point (point-max)))
+		     (mail-@-binds-tighter-than-!
+		      (setq insert-point (point-max)))
+		     (%-pos
+		      (setq insert-point (mail-last-element %-pos)
+			    saved-%-pos (mapcar 'mail-markerize %-pos)
+			    %-pos nil
+			    @-pos (mail-markerize @-pos)))
+		     (@-pos
+		      (setq insert-point @-pos)
+		      (setq @-pos (mail-markerize @-pos)))
+		     (t
+		      (setq insert-point (point-max))))
+	       (narrow-to-region (point-min) insert-point)
+	       (setq saved-!-pos (car !-pos))
+	       (while !-pos
+		 (goto-char (point-max))
+		 (cond ((and (not @-pos)
+			     (not (cdr !-pos)))
+			(setq @-pos (point))
+			(insert-before-markers "@ "))
+		       (t
+			(setq %-pos (cons (point) %-pos))
+			(insert-before-markers "% ")))
+		 (backward-char 1)
+		 (insert-buffer-substring 
+		  (current-buffer)
+		  (if (nth 1 !-pos)
+		      (1+ (nth 1 !-pos))
+		    (point-min))
+		  (car !-pos))
+		 (delete-char 1)
+		 (or (save-excursion
+		       (safe-move-sexp -1)
+		       (skip-chars-backward mail-whitespace)
+		       (eq ?. (preceding-char)))
+		     (insert-before-markers
+		      (if (save-excursion
+			    (skip-chars-backward mail-whitespace)
+			    (eq ?. (preceding-char)))
+			  ""
+			".")
+		      "uucp"))
+		 (setq !-pos (cdr !-pos))))
+	     (and saved-%-pos
+		  (setq %-pos (append (mapcar 'mail-demarkerize saved-%-pos)
+					%-pos)))
+	     (setq @-pos (mail-demarkerize @-pos))
+	     (narrow-to-region (1+ saved-!-pos) (point-max))))
+      (cond ((and %-pos
+		  (not @-pos))
+	     (goto-char (car %-pos))
+	     (delete-char 1)
+	     (setq @-pos (point))
+	     (insert "@")
+	     (setq %-pos (cdr %-pos))))
+      (setq %-pos (nreverse %-pos))
+      ;; RFC 1034 doesn't approve of this, oh well:
+      (downcase-region (or (car %-pos) @-pos (point-max)) (point-max))
+      (cond (%-pos			; implies @-pos valid
+	     (setq temp %-pos)
+	     (catch 'truncated
+	       (while temp
+		 (goto-char (or (nth 1 temp)
+				@-pos))
+		 (skip-chars-backward mail-whitespace)
+		 (save-excursion
+		   (safe-move-sexp -1)
+		   (setq domain-pos (point))
+		   (skip-chars-backward mail-whitespace)
+		   (setq \.-pos (eq ?. (preceding-char))))
+		 (cond ((and \.-pos
+			     (get
+			      (intern
+			       (buffer-substring domain-pos (point)))
+			      'domain-name))
+			(narrow-to-region (point-min) (point))
+			(goto-char (car temp))
+			(delete-char 1)
+			(setq @-pos (point))
+			(setcdr temp nil)
+			(setq %-pos (delq @-pos %-pos))
+			(insert "@")
+			(throw 'truncated t)))
+		 (setq temp (cdr temp))))))
+      (setq mbox-beg (point-min)
+	    mbox-end (if %-pos (car %-pos)
+		       (or @-pos
+			   (point-max))))
+      
+      ;; Done canonicalizing address.
+      
+      (set-buffer extraction-buffer)
+      
+      ;; Find the full name
+      
+      (cond ((and phrase-beg
+		  (eq quote-beg phrase-beg)
+		  (<= quote-end phrase-end))
+	     (narrow-to-region (1+ quote-beg) (1- quote-end))
+	     (undo-backslash-quoting (point-min) (point-max)))
+	    (phrase-beg
+	     (narrow-to-region phrase-beg phrase-end))
+	    (comment-beg
+	     (narrow-to-region (1+ comment-beg) (1- comment-end))
+	     (undo-backslash-quoting (point-min) (point-max)))
+	    (t
+	     ;; *** Work in canon buffer instead?  No, can't.  Hmm.
+	     (delete-region (point-min) (point-max))
+	     (insert-buffer-substring canonicalization-buffer
+				      mbox-beg mbox-end)
+	     (goto-char (point-min))
+	     (setq \.-ends-name (search-forward "_" nil t))
+	     (goto-char (point-min))
+	     (while (progn
+		      (skip-chars-forward mail-whitespace)
+		      (not (eobp)))
+	       (setq char (char-after (point)))
+	       (cond
+		((eq char ?\")
+		 (setq quote-beg (point))
+		 (or (safe-move-sexp 1)
+		     ;; TODO: handle this error condition!!!!!
+		     (forward-char 1))
+		 ;; take into account deletions
+		 (setq quote-end (- (point) 2))
+		 (save-excursion
+		   (backward-char 1)
+		   (delete-char 1)
+		   (goto-char quote-beg)
+		   (delete-char 1))
+		 (undo-backslash-quoting quote-beg quote-end)
+		 (or (eq mail-space-char (char-after (point)))
+		     (insert " "))
+		 (setq \.-ends-name t))
+		((eq char ?.)
+		 (if (eq (char-after (1+ (point))) ?_)
+		     (progn
+		       (forward-char 1)
+		       (delete-char 1)
+		       (insert mail-space-char))
+		   (if \.-ends-name
+		       (narrow-to-region (point-min) (point))
+		     (delete-char 1)
+		     (insert " "))))
+		((memq (char-syntax char) '(?. ?\\))
+		 (delete-char 1)
+		 (insert " "))
+		(t
+		 (setq atom-beg (point))
+		 (forward-word 1)
+		 (setq atom-end (point))
+		 (save-restriction
+		   (narrow-to-region atom-beg atom-end)
+		   (goto-char (point-min))
+		   (while (re-search-forward "\\([^_]+\\)_" nil t)
+		     (replace-match "\\1 "))
+		   (goto-char (point-max))))))))
+      
+      (set-syntax-table address-text-syntax-table)
+      
+      (setq xxx (variant-method (buffer-string)))
+      (delete-region (point-min) (point-max))
+      (insert xxx)
+      (goto-char (point-min))
+
+;;       ;; Compress whitespace
+;;       (goto-char (point-min))
+;;       (while (re-search-forward "[ \t\n]+" nil t)
+;; 	(replace-match " "))
+;;       
+;;       ;; Fix . used as space
+;;       (goto-char (point-min))
+;;       (while (re-search-forward mail-bad-\.-pattern nil t)
+;; 	(replace-match "\\1 \\2"))
+;; 
+;;       ;; Delete trailing parenthesized comment
+;;       (goto-char (point-max))
+;;       (skip-chars-backward mail-whitespace)
+;;       (cond ((memq (char-after (1- (point))) '(?\) ?\} ?\]))
+;; 	     (setq comment-end (point))
+;; 	     (set-syntax-table address-text-comment-syntax-table)
+;; 	     (or (safe-move-sexp -1)
+;; 		 (backward-char 1))
+;; 	     (set-syntax-table address-text-syntax-table)
+;; 	     (setq comment-beg (point))
+;; 	     (skip-chars-backward mail-whitespace)
+;; 	     (if (bobp)
+;; 		 (narrow-to-region (1+ comment-beg) (1- comment-end))
+;; 	       (narrow-to-region (point-min) (point)))))
+;;       
+;;       ;; Find, save, and delete any name suffix
+;;       ;; *** Broken!
+;;       (goto-char (point-min))
+;;       (cond ((re-search-forward mail-full-name-suffix-pattern nil t)
+;; 	     (setq name-suffix (buffer-substring (match-beginning 3)
+;; 						 (match-end 3)))
+;; 	     (replace-match "\\1 \\4")))
+;;       
+;;       ;; Delete ALL CAPS words and after, if preceded by mixed-case or
+;;       ;; lowercase words.  Eg. XT-DEM.
+;;       (goto-char (point-min))
+;;       ;; ## This will lose on something like "SMITH MAX".
+;;       ;; ## maybe it should be
+;;       ;; ##  " \\([A-Z]+[-_/][A-Z]+\\|[A-Z][A-Z][A-Z]\\)\\b.*[^A-Z \t]"
+;;       ;; ## that is, three-letter-upper-case-word with non-upper-case
+;;       ;; ## characters following it.
+;;       (if (re-search-forward mail-mixed-case-name-pattern nil t)
+;; 	  (if (re-search-forward mail-weird-acronym-pattern nil t)
+;; 	      (narrow-to-region (point-min) (match-beginning 0))))
+;;       
+;;       ;; Delete trailing alternative address
+;;       (goto-char (point-min))
+;;       (if (re-search-forward mail-alternative-address-pattern nil t)
+;; 	  (narrow-to-region (point-min) (match-beginning 0)))
+;;       
+;;       ;; Delete trailing comment
+;;       (goto-char (point-min))
+;;       (if (re-search-forward mail-trailing-comment-start-pattern nil t)
+;; 	  (or (progn
+;; 		(goto-char (match-beginning 0))
+;; 		(skip-chars-backward mail-whitespace)
+;; 		(bobp))
+;; 	      (narrow-to-region (point-min) (match-beginning 0))))
+;;       
+;;       ;; Delete trailing comma-separated comment
+;;       (goto-char (point-min))
+;;       ;; ## doesn't this break "Smith, John"?  Yes.
+;;       (re-search-forward mail-last-name-first-pattern nil t)
+;;       (while (search-forward "," nil t)
+;; 	(or (save-excursion
+;; 	      (backward-char 2)
+;; 	      (looking-at mail-full-name-suffix-pattern))
+;; 	    (narrow-to-region (point-min) (1- (point)))))
+;;       
+;;       ;; Delete telephone numbers and ham radio call signs
+;;       (goto-char (point-min))
+;;       (if (re-search-forward mail-telephone-extension-pattern nil t)
+;; 	  (narrow-to-region (point-min) (match-beginning 0)))
+;;       (goto-char (point-min))
+;;       (if (re-search-forward mail-ham-call-sign-pattern nil t)
+;; 	  (if (eq (match-beginning 0) (point-min))
+;; 	      (narrow-to-region (match-end 0) (point-max))
+;; 	    (narrow-to-region (point-min) (match-beginning 0))))
+;;       
+;;       ;; Delete trailing word followed immediately by .
+;;       (goto-char (point-min))
+;;       ;; ## what's this for?  doesn't it mess up "Public, Harry Q."?  No.
+;;       (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t)
+;; 	  (narrow-to-region (point-min) (match-beginning 0)))
+;;       
+;;       ;; Handle & substitution
+;;       ;; TODO: remember to disable middle initial guessing
+;;       (goto-char (point-min))
+;;       (cond ((re-search-forward "\\( \\|\\`\\)&\\( \\|\\'\\)" nil t)
+;; 	     (goto-char (match-end 1))
+;; 	     (delete-char 1)
+;; 	     (capitalize-region
+;; 	      (point)
+;; 	      (progn
+;; 		(insert-buffer-substring canonicalization-buffer
+;; 					 mbox-beg mbox-end)
+;; 		(point)))))
+;;       
+;;       ;; Delete nickname
+;;       (goto-char (point-min))
+;;       (if (re-search-forward mail-nickname-pattern nil t)
+;; 	  (replace-match (if (eq (match-beginning 2) (1- (match-end 2)))
+;; 			     " \\2 "
+;; 			   " ")))
+;;       
+;;       ;; Fixup initials
+;;       (while (progn
+;; 	       (goto-char (point-min))
+;; 	       (re-search-forward mail-bad-initials-pattern nil t))
+;; 	(replace-match
+;; 	 (if (match-beginning 4)
+;; 	     "\\1. \\4"
+;; 	   (if (match-beginning 5)
+;; 	       "\\1. \\5"
+;; 	     "\\1. "))))
+;;       
+;;       ;; Delete title
+;;       (goto-char (point-min))
+;;       (if (re-search-forward mail-full-name-prefixes nil t)
+;; 	  (narrow-to-region (point) (point-max)))
+;;       
+;;       ;; Delete trailing and preceding non-name characters
+;;       (goto-char (point-min))
+;;       (skip-chars-forward mail-non-begin-name-chars)
+;;       (narrow-to-region (point) (point-max))
+;;       (goto-char (point-max))
+;;       (skip-chars-backward mail-non-end-name-chars)
+;;       (narrow-to-region (point-min) (point))
+      
+      ;; If name is "First Last" and userid is "F?L", then assume
+      ;; the middle initial is the second letter in the userid.
+      ;; initially by Jamie Zawinski <jwz@lucid.com>
+      (cond ((and (eq 3 (- mbox-end mbox-beg))
+		  (progn
+		    (goto-char (point-min))
+		    (looking-at mail-two-name-pattern)))
+	     (setq fi (char-after (match-beginning 0))
+		   li (char-after (match-beginning 3)))
+	     (save-excursion
+	       (set-buffer canonicalization-buffer)
+	       ;; char-equal is ignoring case here, so no need to upcase
+	       ;; or downcase.
+	       (let ((case-fold-search t))
+		 (and (char-equal fi (char-after mbox-beg))
+		      (char-equal li (char-after (1- mbox-end)))
+		      (setq mi (char-after (1+ mbox-beg))))))
+	     (cond ((and mi
+			 ;; TODO: use better table than syntax table
+			 (eq ?w (char-syntax mi)))
+		    (goto-char (match-beginning 3))
+		    (insert (upcase mi) ". ")))))
+      
+;;       ;; Restore suffix
+;;       (cond (name-suffix
+;; 	     (goto-char (point-max))
+;; 	     (insert ", " name-suffix)
+;; 	     (backward-word 1)
+;; 	     (cond ((memq (following-char) '(?j ?J ?s ?S))
+;; 		    (capitalize-word 1)
+;; 		    (or (eq (following-char) ?.)
+;; 			(insert ?.)))
+;; 		   (t
+;; 		    (upcase-word 1)))))
+      
+      ;; Result
+      (list (buffer-string)
+	    (progn
+	      (set-buffer canonicalization-buffer)
+	      (buffer-string)))
+      )))
+
+;; TODO: put this back in the above function now that it's proven:
+(defun variant-method (string)
+  (let ((variant-buffer (get-buffer-create "*variant method buffer*"))
+	(word-count 0)
+	mixed-case-flag lower-case-flag upper-case-flag
+	suffix-flag last-name-comma-flag
+	comment-beg comment-end initial beg end
+	)
+    (save-excursion
+      (set-buffer variant-buffer)
+      (buffer-flush-undo variant-buffer)
+      (set-syntax-table address-text-syntax-table)
+      (widen)
+      (erase-buffer)
+      (setq case-fold-search nil)
+      
+      (insert string)
+      
+      ;; Fix . used as space
+      (goto-char (point-min))
+      (while (re-search-forward mail-bad-\.-pattern nil t)
+	(replace-match "\\1 \\2"))
+
+      ;; Skip any initial garbage.
+      (goto-char (point-min))
+      (skip-chars-forward mail-non-begin-name-chars)
+      (skip-chars-backward "& \"")
+      (narrow-to-region (point) (point-max))
+      
+      (catch 'stop
+	(while t
+	  (skip-chars-forward mail-whitespace)
+	  
+	  (cond
+	   
+	   ;; Delete title
+	   ((and (eq word-count 0)
+		 (looking-at mail-full-name-prefixes))
+	    (goto-char (match-end 0))
+	    (narrow-to-region (point) (point-max)))
+	   
+	   ;; Stop after name suffix
+	   ((and (>= word-count 2)
+		 (looking-at mail-full-name-suffix-pattern))
+	    (skip-chars-backward mail-whitespace)
+	    (setq suffix-flag (point))
+	    (if (eq ?, (following-char))
+		(forward-char 1)
+	      (insert ?,))
+	    ;; Enforce at least one space after comma
+	    (or (eq mail-space-char (following-char))
+		(insert mail-space-char))
+	    (skip-chars-forward mail-whitespace)
+	    (cond ((memq (following-char) '(?j ?J ?s ?S))
+		   (capitalize-word 1)
+		   (if (eq (following-char) ?.)
+		       (forward-char 1)
+		     (insert ?.)))
+		  (t
+		   (upcase-word 1)))
+	    (setq word-count (1+ word-count))
+	    (throw 'stop t))
+	   
+	   ;; Handle SCA names
+	   ((looking-at "MKA \\(.+\\)")	; "Mundanely Known As"
+	    (setq word-count 0)
+	    (goto-char (match-beginning 1))
+	    (narrow-to-region (point) (point-max)))
+	   
+	   ;; Various stopping points
+	   ((or
+	     ;; Stop before ALL CAPS acronyms, if preceded by mixed-case or
+	     ;; lowercase words.  Eg. XT-DEM.
+	     (and (>= word-count 2)
+		  (or mixed-case-flag lower-case-flag)
+		  (looking-at mail-weird-acronym-pattern)
+		  (not (looking-at mail-roman-numeral-pattern)))
+	     ;; Stop before 4-or-more letter lowercase words preceded by
+	     ;; mixed case or uppercase words.
+	     (and (>= word-count 2)
+		  (or upper-case-flag mixed-case-flag)
+		  (looking-at "[a-z][a-z][a-z][a-z]+\\b"))
+	     ;; Stop before trailing alternative address
+	     (looking-at mail-alternative-address-pattern)
+	     ;; Stop before trailing comment not introduced by comma
+	     (looking-at mail-trailing-comment-start-pattern)
+	     ;; Stop before telephone numbers
+	     (looking-at mail-telephone-extension-pattern))
+	    (throw 'stop t))
+	   
+	   ;; Check for initial last name followed by comma
+	   ((and (eq ?, (following-char))
+		 (eq word-count 1))
+	    (forward-char 1)
+	    (setq last-name-comma-flag t)
+	    (or (eq mail-space-char (following-char))
+		(insert mail-space-char)))
+	   
+	   ;; Stop before trailing comma-separated comment
+	   ((eq ?, (following-char))
+	    (throw 'stop t))
+	   
+	   ;; Delete parenthesized/quoted comment/nickname
+	   ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
+	    (setq comment-beg (point))
+	    (set-syntax-table address-text-comment-syntax-table)
+	    (cond ((memq (following-char) '(?\' ?\`))
+		   (if (eq ?\' (following-char))
+		       (forward-char 1))
+		   (or (search-forward "'" nil t)
+		       (delete-char 1)))
+		  (t
+		   (or (safe-move-sexp 1)
+		       (goto-char (point-max)))))
+	    (set-syntax-table address-text-syntax-table)
+	    (setq comment-end (point))
+	    (cond
+	     ;; Handle case of entire name being quoted
+	     ((and (eq word-count 0)
+		   (looking-at " *\\'")
+		   (>= (- comment-end comment-beg) 2))
+	      (narrow-to-region (1+ comment-beg) (1- comment-end))
+	      (goto-char (point-min)))
+	     (t
+	      ;; Handle case of quoted initial
+	      (if (and (or (= 3 (- comment-end comment-beg))
+			   (and (= 4 (- comment-end comment-beg))
+				(eq ?. (char-after (+ 2 comment-beg)))))
+		       (not (looking-at " *\\'")))
+		  (setq initial (char-after (1+ comment-beg)))
+		(setq initial nil))
+	      (delete-region comment-beg comment-end)
+	      (if initial
+		  (insert initial ". ")))))
+	   
+	   ;; Delete ham radio call signs
+	   ((looking-at mail-ham-call-sign-pattern)
+	    (delete-region (match-beginning 0) (match-end 0)))
+	   
+	   ;; Handle & substitution
+	   ;; TODO: remember to disable middle initial guessing
+	   ((and (or (bobp)
+		     (eq mail-space-char (preceding-char)))
+		 (looking-at "&\\( \\|\\'\\)"))
+	    (delete-char 1)
+	    (capitalize-region
+	     (point)
+	     (progn
+	       (insert-buffer-substring canonicalization-buffer
+					mbox-beg mbox-end)
+	       (point))))
+	   
+	   ;; Fixup initials
+	   ((looking-at mail-initial-pattern)
+	    (or (eq (following-char) (upcase (following-char)))
+		(setq lower-case-flag t))
+	    (forward-char 1)
+	    (if (eq ?. (following-char))
+		(forward-char 1)
+	      (insert ?.))
+	    (or (eq mail-space-char (following-char))
+		(insert mail-space-char))
+	    (setq word-count (1+ word-count)))
+	   
+	   ;; Regular name words
+	   ((looking-at mail-name-pattern)
+	    (setq beg (point))
+	    (setq end (match-end 0))
+	    (set (if (re-search-forward "[a-z]" end t)
+		     (if (progn
+			   (goto-char beg)
+			   (re-search-forward "[A-Z]" end t))
+			 'mixed-case-flag
+		       'lower-case-flag)
+		   'upper-case-flag) t)
+	    (goto-char end)
+	    (setq word-count (1+ word-count)))
+
+	   (t
+	    (throw 'stop t)))))
+      
+      (narrow-to-region (point-min) (point))
+
+      ;; Delete trailing word followed immediately by .
+      (cond ((not suffix-flag)
+	     (goto-char (point-min))
+	     (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t)
+		 (narrow-to-region (point-min) (match-beginning 0)))))
+      
+      ;; If last name first put it at end (but before suffix)
+      (cond (last-name-comma-flag
+	     (goto-char (point-min))
+	     (search-forward ",")
+	     (setq end (1- (point)))
+	     (goto-char (or suffix-flag (point-max)))
+	     (or (eq mail-space-char (preceding-char))
+		 (insert mail-space-char))
+	     (insert-buffer-substring (current-buffer) (point-min) end)
+	     (narrow-to-region (1+ end) (point-max))))
+      
+      (goto-char (point-max))
+      (skip-chars-backward mail-non-end-name-chars)
+      (if (eq ?. (following-char))
+	  (forward-char 1))
+      (narrow-to-region (point)
+			(progn
+			  (goto-char (point-min))
+			  (skip-chars-forward mail-non-begin-name-chars)
+			  (point)))
+      
+      ;; Compress whitespace
+      (goto-char (point-min))
+      (while (re-search-forward "[ \t\n]+" nil t)
+	(replace-match " "))
+
+      (buffer-substring (point-min) (point-max))
+
+      )))
+
+;; The country names are just in there for show right now, and because
+;; Jamie thought it would be neat.  They aren't used yet.
+
+;; Keep in mind that the country abbreviations follow ISO-3166.  There is
+;; a U.S. FIPS that specifies a different set of two-letter country
+;; abbreviations.
+
+;; TODO: put this in its own obarray, instead of cluttering up the main
+;; symbol table with junk.
+
+(mapcar
+ (function
+  (lambda (x)
+    (if (symbolp x)
+	(put x 'domain-name t)
+      (put (car x) 'domain-name (nth 1 x)))))
+ '((ag "Antigua")
+   (ar "Argentina")			; Argentine Republic
+   arpa					; Advanced Projects Research Agency
+   (at "Austria")			; The Republic of _
+   (au "Australia")
+   (bb "Barbados")
+   (be "Belgium")			; The Kingdom of _
+   (bg "Bulgaria")
+   bitnet				; Because It's Time NET
+   (bo "Bolivia")			; Republic of _
+   (br "Brazil")			; The Federative Republic of _
+   (bs "Bahamas")
+   (bz "Belize")
+   (ca "Canada")
+   (ch "Switzerland")			; The Swiss Confederation
+   (cl "Chile")				; The Republic of _
+   (cn "China")				; The People's Republic of _
+   (co "Columbia")
+   com					; Commercial
+   (cr "Costa Rica")			; The Republic of _
+   (cs "Czechoslovakia")
+   (de "Germany")
+   (dk "Denmark")
+   (dm "Dominica")
+   (do "Dominican Republic")		; The _
+   (ec "Ecuador")			; The Republic of _
+   edu					; Educational
+   (eg "Egypt")				; The Arab Republic of _
+   (es "Spain")				; The Kingdom of _
+   (fi "Finland")			; The Republic of _
+   (fj "Fiji")
+   (fr "France")
+   gov					; Government (U.S.A.)
+   (gr "Greece")			; The Hellenic Republic
+   (hk "Hong Kong")
+   (hu "Hungary")			; The Hungarian People's Republic (???)
+   (ie "Ireland")
+   (il "Israel")			; The State of _
+   (in "India")				; The Republic of _
+   int					; something British, don't know what
+   (is "Iceland")			; The Republic of _
+   (it "Italy")				; The Italian Republic
+   (jm "Jamaica")
+   (jp "Japan")
+   (kn "St. Kitts and Nevis")
+   (kr "South Korea")
+   (lc "St. Lucia")
+   (lk "Sri Lanka")		       ; The Democratic Socialist Republic of _
+   mil					; Military (U.S.A.)
+   (mx "Mexico")			; The United Mexican States
+   (my "Malaysia")			; changed to Myanmar????
+   (na "Namibia")
+   nato					; North Atlantic Treaty Organization
+   net					; Network
+   (ni "Nicaragua")			; The Republic of _
+   (nl "Netherlands")			; The Kingdom of the _
+   (no "Norway")			; The Kingdom of _
+   (nz "New Zealand")
+   org					; Organization
+   (pe "Peru")
+   (pg "Papua New Guinea")
+   (ph "Philippines")			; The Republic of the _
+   (pl "Poland")
+   (pr "Puerto Rico")
+   (pt "Portugal")			; The Portugese Republic
+   (py "Paraguay")
+   (se "Sweden")			; The Kingdom of _
+   (sg "Singapore")			; The Republic of _
+   (sr "Suriname")
+   (su "Soviet Union")
+   (th "Thailand")			; The Kingdom of _
+   (tn "Tunisia")
+   (tr "Turkey")			; The Republic of _
+   (tt "Trinidad and Tobago")
+   (tw "Taiwan")
+   (uk "United Kingdom")		; The _ of Great Britain
+   unter-dom				; something German
+   (us "U.S.A.")			; The United States of America
+   uucp					; Unix to Unix CoPy
+   (uy "Uruguay")			; The Eastern Republic of _
+   (vc "St. Vincent and the Grenadines")
+   (ve "Venezuela")			; The Republic of _
+   (yu "Yugoslavia")			; The Socialist Federal Republic of _
+   ;; Also said to be Zambia ...
+   (za "South Africa")			; The Republic of _ (why not Zaire???)
+   (zw "Zimbabwe")			; Republic of _
+   ))
+;; fipnet
+
+
+;; Code for testing.
+
+(defun time-extract ()
+  (let (times list)
+    (setq times (cons (current-time-string) times)
+	  list problem-address-alist)
+    (while list
+      (mail-extract-address-components (car (car list)))
+      (setq list (cdr list)))
+    (setq times (cons (current-time-string) times))
+    (nreverse times)))
+
+(defun test-extract (&optional starting-point)
+  (interactive)
+  (set-buffer (get-buffer-create "*Testing*"))
+  (erase-buffer)
+  (sit-for 0)
+  (mapcar 'test-extract-internal
+	  (if starting-point
+	      (memq starting-point problem-address-alist)
+	     problem-address-alist)))
+
+(defvar failed-item)
+(defun test-extract-internal (item)
+  (setq failed-item item)
+  (let* ((address (car item))
+	 (correct-name (nth 1 item))
+	 (correct-canon (nth 2 item))
+	 (result (mail-extract-address-components address))
+	 (name (car result))
+	 (canon (nth 1 result))
+	 (name-correct (or (null correct-name)
+			   (string-equal (downcase correct-name)
+					 (downcase name))))
+	 (canon-correct (or (null correct-canon)
+			    (string-equal correct-canon canon))))
+    (cond ((not (and name-correct canon-correct))
+	   (pop-to-buffer "*Testing*")
+	   (select-window (get-buffer-window (current-buffer)))
+	   (goto-char (point-max))
+	   (insert "Address: " address "\n")
+	   (if (not name-correct)
+	       (insert " Correct Name:  [" correct-name
+		       "]\; Result: [" name "]\n"))
+	   (if (not canon-correct)
+	       (insert " Correct Canon: [" correct-canon
+		       "]\; Result: [" canon "]\n"))
+	   (insert "\n")
+	   (sit-for 0))))
+  (setq failed-item nil))
+
+(defun test-continue-extract ()
+  (interactive)
+  (test-extract failed-item))
+
+
+;; Assorted junk.
+
+;;	warsaw@nlm.nih.gov (A Bad Dude -- Barry Warsaw)
+
+;;'(from
+;;  reply-to
+;;  return-path
+;;  x-uucp-from
+;;  sender
+;;  resent-from
+;;  resent-sender
+;;  resent-reply-to)
+
+;;; mail-extr.el ends here
author	Eric S. Raymond <esr@snark.thyrsus.com>
date	Fri, 17 Jul 1992 06:48:03 +0000
parents
children	20674ae6bf52