Mercurial > emacs
diff lisp/mail/mail-extr.el @ 809:8a0066235d56
Initial revision
author | Eric S. Raymond <esr@snark.thyrsus.com> |
---|---|
date | Fri, 17 Jul 1992 06:48:03 +0000 |
parents | |
children | 20674ae6bf52 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/mail/mail-extr.el Fri Jul 17 06:48:03 1992 +0000 @@ -0,0 +1,1469 @@ +;;; mail-extr.el --- extract full name and address from RFC 822 mail header. + +;; Author: Joe Wells <jbw@cs.bu.edu> +;; Last-Modified: 7 Apr 1992 +;; Version: 1.0 +;; Adapted-By: ESR +;; Keywords: mail + +;; Copyright (C) 1992 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 1, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to +;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +;;; Commentary: + +;; Here is `mail-extr', a package for extracting full names and canonical +;; addresses from RFC 822 mail headers. It is intended to be hooked into +;; other Emacs Lisp packages that deal with RFC 822 format messages, such as +;; Gnews, GNUS, RMAIL, MH-E, BBDB, VM, Supercite, etc. Thus, this release is +;; mainly for Emacs Lisp developers. + +;; There are two main benefits: + +;; 1. Higher probability of getting the correct full name for a human than +;; any other package I know of. (On the other hand, it will cheerfully +;; mangle non-human names/comments.) +;; 2. Address part is put in a canonical form. + +;; The interface is not yet carved in stone; please give me suggestions. + +;; I have an extensive test-case collection of funny addresses if you want to +;; work with the code. Developing this code requires frequent testing to +;; make sure you're not breaking functionality. I'm not posting the +;; test-cases because they take over 100K. + +;; If you find an address that mail-extr fails on, please send it to me along +;; with what you think the correct results should be. I do not consider it a +;; bug if mail-extr mangles a comment that does not correspond to a real +;; human full name, although I would prefer that mail-extr would return the +;; comment as-is. + +;; Features: + +;; * Full name handling: + +;; * knows where full names can be found in an address. +;; * avoids using empty comments and quoted text. +;; * extracts full names from mailbox names. +;; * recognizes common formats for comments after a full name. +;; * puts a period and a space after each initial. +;; * understands & referring to the mailbox name capitalized. +;; * strips name prefixes like "Prof.", etc.. +;; * understands what characters can occur in names (not just letters). +;; * figures out middle initial from mailbox name. +;; * removes funny nicknames. +;; * keeps suffixes such as Jr., Sr., III, etc. +;; * reorders "Last, First" type names. + +;; * Address handling: + +;; * parses rfc822 quoted text, comments, and domain literals. +;; * parses rfc822 multi-line headers. +;; * does something reasonable with rfc822 GROUP addresses. +;; * handles many rfc822 noncompliant and garbage addresses. +;; * canonicalizes addresses (after stripping comments/phrases outside <>). +;; * converts ! addresses into .UUCP and %-style addresses. +;; * converts rfc822 ROUTE addresses to %-style addresses. +;; * truncates %-style addresses at leftmost fully qualified domain name. +;; * handles local relative precedence of ! vs. % and @ (untested). + +;; It does almost no string creation. It primarily uses the built-in +;; parsing routines with the appropriate syntax tables. This should +;; result in greater speed. + +;; TODO: + +;; * handle all test cases. (This will take forever.) +;; * software to pick the correct header to use (eg., "Senders-Name:"). +;; * multiple addresses in the "From:" header (almost all of the necessary +;; code is there). +;; * flag to not treat `,' as an address separator. (This is useful when +;; there is a "From:" header but no "Sender:" header, because then there +;; is only allowed to be one address.) +;; * mailbox name does not necessarily contain full name. +;; * fixing capitalization when it's all upper or lowercase. (Hard!) +;; * some of the domain literal handling is missing. (But I've never even +;; seen one of these in a mail address, so maybe no big deal.) +;; * arrange to have syntax tables byte-compiled. +;; * speed hacks. +;; * delete unused variables. +;; * arrange for testing with different relative precedences of ! vs. @ +;; and %. +;; * put variant-method back into mail-extract-address-components. +;; * insert documentation strings! +;; * handle X.400-gatewayed addresses according to RFC 1148. + +;;; Change Log: +;; +;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu) +;; +;; * Cleaned up some more. Release version 1.0 to world. +;; +;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu) +;; +;; * Cleaned up full name extraction extensively. +;; +;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu) +;; +;; * Total rewrite. Integrated mail-canonicalize-address into +;; mail-extract-address-components. Now handles GROUP addresses more +;; or less correctly. Better handling of lots of different cases. +;; +;; Fri Jun 14 19:39:50 1991 +;; * Created. + +;;; Code: + +;; Variable definitions. + +(defvar mail-@-binds-tighter-than-! nil) + +;;---------------------------------------------------------------------- +;; what orderings are meaningful????? +;;(defvar mail-operator-precedence-list '(?! ?% ?@)) +;; Right operand of a % or a @ must be a domain name, period. No other +;; operators allowed. Left operand of a @ is an address relative to that +;; site. + +;; Left operand of a ! must be a domain name. Right operand is an +;; arbitrary address. +;;---------------------------------------------------------------------- + +(defconst mail-space-char 32) + +(defconst mail-whitespace " \t\n") + +;; Any character that can occur in a name in an RFC822 address. +;; Yes, there are weird people with digits in their names. +(defconst mail-all-letters "A-Za-z---{|}'~0-9`.") + +;; Any character that can occur in a name, not counting characters that +;; separate parts of a multipart name. +(defconst mail-all-letters-but-separators "A-Za-z{|}'~0-9`") + +;; Any character that can start a name +(defconst mail-first-letters "A-Za-z") + +;; Any character that can end a name. +(defconst mail-last-letters "A-Za-z`'.") + +;; Matches an initial not followed by both a period and a space. +(defconst mail-bad-initials-pattern + (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)" + mail-all-letters mail-first-letters mail-all-letters)) + +(defconst mail-non-name-chars (concat "^" mail-all-letters ".")) + +(defconst mail-non-begin-name-chars (concat "^" mail-first-letters)) + +(defconst mail-non-end-name-chars (concat "^" mail-last-letters)) + +;; Matches periods used instead of spaces. Must not match the period +;; following an initial. +(defconst mail-bad-\.-pattern + (format "\\([%s][%s]\\)\\.+\\([%s]\\)" + mail-all-letters mail-last-letters mail-first-letters)) + +;; Matches an embedded or leading nickname that should be removed. +(defconst mail-nickname-pattern + (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] " + mail-all-letters)) + +;; Matches a leading title that is not part of the name (does not +;; contribute to uniquely identifying the person). +(defconst mail-full-name-prefixes + '"\\` *\\(Prof\\|Dr\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.? ") + +;; Matches the occurrence of a generational name suffix, and the last +;; character of the preceding name. +(defconst mail-full-name-suffix-pattern + (format + "\\(,? ?\\([JjSs]r\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)" + mail-all-letters mail-all-letters)) + +(defconst mail-roman-numeral-pattern + "V?I+V?\\b") + +;; Matches a trailing uppercase (with other characters possible) acronym. +;; Must not match a trailing uppercase last name or trailing initial +(defconst mail-weird-acronym-pattern "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)") + +;; Matches a mixed-case or lowercase name (not an initial). +(defconst mail-mixed-case-name-pattern + (format + "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)" + mail-all-letters mail-last-letters + mail-first-letters mail-all-letters mail-all-letters mail-last-letters + mail-first-letters mail-all-letters)) + +;; Matches a trailing alternative address. +(defconst mail-alternative-address-pattern "[a-zA-Z.]+[!@][a-zA-Z.]") + +;; Matches a variety of trailing comments not including comma-delimited +;; comments. +(defconst mail-trailing-comment-start-pattern " [-{]\\|--\\|[+@#></\;]") + +;; Matches a name (not an initial). +;; This doesn't force a word boundary at the end because sometimes a +;; comment is separated by a `-' with no preceding space. +(defconst mail-name-pattern + (format + "\\b[%s][%s]*[%s]" + mail-first-letters mail-all-letters mail-last-letters)) + +(defconst mail-initial-pattern + (format "\\b[%s]\\([. ]\\|\\b\\)" mail-first-letters)) + +;; Matches a single name before a comma. +(defconst mail-last-name-first-pattern + (concat "\\`" mail-name-pattern ",")) + +;; Matches telephone extensions. +(defconst mail-telephone-extension-pattern + "\\(\\([Ee]xt\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+") + +;; Matches ham radio call signs. +(defconst mail-ham-call-sign-pattern + "\\b[A-Z]+[0-9][A-Z0-9]*") + +;; Matches normal single-part name +(defconst mail-normal-name-pattern + (format + "\\b[%s][%s]+[%s]" + mail-first-letters mail-all-letters-but-separators mail-last-letters)) + +;; Matches normal two names with missing middle initial +(defconst mail-two-name-pattern + (concat "\\`\\(" mail-normal-name-pattern + "\\|" mail-initial-pattern + "\\) +\\(" mail-normal-name-pattern "\\)\\(,\\|\\'\\)")) + +(defvar address-syntax-table (make-syntax-table)) +(defvar address-comment-syntax-table (make-syntax-table)) +(defvar address-domain-literal-syntax-table (make-syntax-table)) +(defvar address-text-comment-syntax-table (make-syntax-table)) +(defvar address-text-syntax-table (make-syntax-table)) +(mapcar + (function + (lambda (pair) + (let ((syntax-table (symbol-value (car pair)))) + (mapcar + (function + (lambda (item) + (if (eq 2 (length item)) + (modify-syntax-entry (car item) (car (cdr item)) syntax-table) + (let ((char (car item)) + (bound (car (cdr item))) + (syntax (car (cdr (cdr item))))) + (while (<= char bound) + (modify-syntax-entry char syntax syntax-table) + (setq char (1+ char))))))) + (cdr pair))))) + '((address-syntax-table + (0 31 "w") ;control characters + (32 " ") ;SPC + (?! ?~ "w") ;printable characters + (127 "w") ;DEL + (128 255 "w") ;high-bit-on characters + (?\t " ") + (?\r " ") + (?\n " ") + (?\( ".") + (?\) ".") + (?< ".") + (?> ".") + (?@ ".") + (?, ".") + (?\; ".") + (?: ".") + (?\\ "\\") + (?\" "\"") + (?. ".") + (?\[ ".") + (?\] ".") + ;; % and ! aren't RFC822 characters, but it is convenient to pretend + (?% ".") + (?! ".") + ) + (address-comment-syntax-table + (0 255 "w") + (?\( "\(\)") + (?\) "\)\(") + (?\\ "\\")) + (address-domain-literal-syntax-table + (0 255 "w") + (?\[ "\(\]") ;?????? + (?\] "\)\[") ;?????? + (?\\ "\\")) + (address-text-comment-syntax-table + (0 255 "w") + (?\( "\(\)") + (?\) "\)\(") + (?\[ "\(\]") + (?\] "\)\[") + (?\{ "\(\}") + (?\} "\)\{") + (?\\ "\\") + (?\" "\"") + ;; (?\' "\)\`") + ;; (?\` "\(\'") + ) + (address-text-syntax-table + (0 255 ".") + (?A ?Z "w") + (?a ?z "w") + (?- "w") + (?\} "w") + (?\{ "w") + (?| "w") + (?\' "w") + (?~ "w") + (?0 ?9 "w")) + )) + + +;; Utility functions and macros. + +(defmacro undo-backslash-quoting (beg end) + (`(save-excursion + (save-restriction + (narrow-to-region (, beg) (, end)) + (goto-char (point-min)) + ;; undo \ quoting + (while (re-search-forward "\\\\\\(.\\)" nil t) + (replace-match "\\1") + ;; CHECK: does this leave point after the replacement? + ))))) + +(defmacro mail-nuke-char-at (pos) + (` (save-excursion + (goto-char (, pos)) + (delete-char 1) + (insert mail-space-char)))) + +(defmacro mail-nuke-elements-outside-range (list-symbol beg-symbol end-symbol + &optional no-replace) + (` (progn + (setq temp (, list-symbol)) + (while temp + (cond ((or (> (car temp) (, end-symbol)) + (< (car temp) (, beg-symbol))) + (, (or no-replace + (` (mail-nuke-char-at (car temp))))) + (setcar temp nil))) + (setq temp (cdr temp))) + (setq (, list-symbol) (delq nil (, list-symbol)))))) + +(defun mail-demarkerize (marker) + (and marker + (if (markerp marker) + (let ((temp (marker-position marker))) + (set-marker marker nil) + temp) + marker))) + +(defun mail-markerize (pos) + (and pos + (if (markerp pos) + pos + (copy-marker pos)))) + +(defmacro mail-last-element (list) + "Return last element of LIST." + (` (let ((list (, list))) + (while (not (null (cdr list))) + (setq list (cdr list))) + (car list)))) + +(defmacro safe-move-sexp (arg) + "Safely skip over one balanced sexp, if there is one. Return t if success." + (` (condition-case error + (progn + (goto-char (scan-sexps (point) (, arg))) + t) + (error + (if (string-equal (nth 1 error) "Unbalanced parentheses") + nil + (while t + (signal (car error) (cdr error)))))))) + + +;; The main function to grind addresses + +(defun mail-extract-address-components (address) + "Given an rfc 822 ADDRESS, extract full name and canonical address. +Returns a list of the form (FULL-NAME CANONICAL-ADDRESS)." + (let ((canonicalization-buffer (get-buffer-create "*canonical address*")) + (extraction-buffer (get-buffer-create "*extract address components*")) + (foo 'bar) + char + multiple-addresses + <-pos >-pos @-pos :-pos ,-pos !-pos %-pos \;-pos + group-:-pos group-\;-pos route-addr-:-pos + record-pos-symbol + first-real-pos last-real-pos + phrase-beg phrase-end + comment-beg comment-end + quote-beg quote-end + atom-beg atom-end + mbox-beg mbox-end + \.-ends-name + temp + name-suffix + saved-point + fi mi li + saved-%-pos saved-!-pos saved-@-pos + domain-pos \.-pos insert-point) + + (save-excursion + (set-buffer extraction-buffer) + (buffer-flush-undo extraction-buffer) + (set-syntax-table address-syntax-table) + (widen) + (erase-buffer) + (setq case-fold-search nil) + + ;; Insert extra space at beginning to allow later replacement with < + ;; without having to move markers. + (insert mail-space-char address) + + ;; stolen from rfc822.el + ;; Unfold multiple lines. + (goto-char (point-min)) + (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) + (replace-match "\\1 " t)) + + ;; first pass grabs useful information about address + (goto-char (point-min)) + (while (progn + (skip-chars-forward mail-whitespace) + (not (eobp))) + (setq char (char-after (point))) + (or first-real-pos + (if (not (eq char ?\()) + (setq first-real-pos (point)))) + (cond + ;; comment + ((eq char ?\() + (set-syntax-table address-comment-syntax-table) + ;; only record the first non-empty comment's position + (if (and (not comment-beg) + (save-excursion + (forward-char 1) + (skip-chars-forward mail-whitespace) + (not (eq ?\) (char-after (point)))))) + (setq comment-beg (point))) + ;; TODO: don't record if unbalanced + (or (safe-move-sexp 1) + (forward-char 1)) + (set-syntax-table address-syntax-table) + (if (and comment-beg + (not comment-end)) + (setq comment-end (point)))) + ;; quoted text + ((eq char ?\") + ;; only record the first non-empty quote's position + (if (and (not quote-beg) + (save-excursion + (forward-char 1) + (skip-chars-forward mail-whitespace) + (not (eq ?\" (char-after (point)))))) + (setq quote-beg (point))) + ;; TODO: don't record if unbalanced + (or (safe-move-sexp 1) + (forward-char 1)) + (if (and quote-beg + (not quote-end)) + (setq quote-end (point)))) + ;; domain literals + ((eq char ?\[) + (set-syntax-table address-domain-literal-syntax-table) + (or (safe-move-sexp 1) + (forward-char 1)) + (set-syntax-table address-syntax-table)) + ;; commas delimit addresses when outside < > pairs. + ((and (eq char ?,) + (or (null <-pos) + (and >-pos + ;; handle weird munged addresses + (> (mail-last-element <-pos) (car >-pos))))) + (setq multiple-addresses t) + (delete-char 1) + (narrow-to-region (point-min) (point))) + ;; record the position of various interesting chars, determine + ;; legality later. + ((setq record-pos-symbol + (cdr (assq char + '((?< . <-pos) (?> . >-pos) (?@ . @-pos) + (?: . :-pos) (?, . ,-pos) (?! . !-pos) + (?% . %-pos) (?\; . \;-pos))))) + (set record-pos-symbol + (cons (point) (symbol-value record-pos-symbol))) + (forward-char 1)) + ((eq char ?.) + (forward-char 1)) + ((memq char '( + ;; comment terminator illegal + ?\) + ;; domain literal terminator illegal + ?\] + ;; \ allowed only within quoted strings, + ;; domain literals, and comments + ?\\ + )) + (mail-nuke-char-at (point)) + (forward-char 1)) + (t + (forward-word 1))) + (or (eq char ?\() + (setq last-real-pos (point)))) + + ;; Use only the leftmost <, if any. Replace all others with spaces. + (while (cdr <-pos) + (mail-nuke-char-at (car <-pos)) + (setq <-pos (cdr <-pos))) + + ;; Use only the rightmost >, if any. Replace all others with spaces. + (while (cdr >-pos) + (mail-nuke-char-at (nth 1 >-pos)) + (setcdr >-pos (nthcdr 2 >-pos))) + + ;; If multiple @s and a :, but no < and >, insert around buffer. + ;; This commonly happens on the UUCP "From " line. Ugh. + (cond ((and (> (length @-pos) 1) + :-pos ;TODO: check if between @s + (not <-pos)) + (goto-char (point-min)) + (delete-char 1) + (setq <-pos (list (point))) + (insert ?<))) + + ;; If < but no >, insert > in rightmost possible position + (cond ((and <-pos + (null >-pos)) + (goto-char (point-max)) + (setq >-pos (list (point))) + (insert ?>))) + + ;; If > but no <, replace > with space. + (cond ((and >-pos + (null <-pos)) + (mail-nuke-char-at (car >-pos)) + (setq >-pos nil))) + + ;; Turn >-pos and <-pos into non-lists + (setq >-pos (car >-pos) + <-pos (car <-pos)) + + ;; Trim other punctuation lists of items outside < > pair to handle + ;; stupid MTAs. + (cond (<-pos ; don't need to check >-pos also + ;; handle bozo software that violates RFC 822 by sticking + ;; punctuation marks outside of a < > pair + (mail-nuke-elements-outside-range @-pos <-pos >-pos t) + ;; RFC 822 says nothing about these two outside < >, but + ;; remove those positions from the lists to make things + ;; easier. + (mail-nuke-elements-outside-range !-pos <-pos >-pos t) + (mail-nuke-elements-outside-range %-pos <-pos >-pos t))) + + ;; Check for : that indicates GROUP list and for : part of + ;; ROUTE-ADDR spec. + ;; Can't possibly be more than two :. Nuke any extra. + (while :-pos + (setq temp (car :-pos) + :-pos (cdr :-pos)) + (cond ((and <-pos >-pos + (> temp <-pos) + (< temp >-pos)) + (if (or route-addr-:-pos + (< (length @-pos) 2) + (> temp (car @-pos)) + (< temp (nth 1 @-pos))) + (mail-nuke-char-at temp) + (setq route-addr-:-pos temp))) + ((or (not <-pos) + (and <-pos + (< temp <-pos))) + (setq group-:-pos temp)))) + + ;; Nuke any ; that is in or to the left of a < > pair or to the left + ;; of a GROUP starting :. Also, there may only be one ;. + (while \;-pos + (setq temp (car \;-pos) + \;-pos (cdr \;-pos)) + (cond ((and <-pos >-pos + (> temp <-pos) + (< temp >-pos)) + (mail-nuke-char-at temp)) + ((and (or (not group-:-pos) + (> temp group-:-pos)) + (not group-\;-pos)) + (setq group-\;-pos temp)))) + + ;; Handle junk like ";@host.company.dom" that sendmail adds. + ;; **** should I remember comment positions? + (and group-\;-pos + ;; this is fine for now + (mail-nuke-elements-outside-range !-pos group-:-pos group-\;-pos t) + (mail-nuke-elements-outside-range @-pos group-:-pos group-\;-pos t) + (mail-nuke-elements-outside-range %-pos group-:-pos group-\;-pos t) + (mail-nuke-elements-outside-range ,-pos group-:-pos group-\;-pos t) + (and last-real-pos + (> last-real-pos (1+ group-\;-pos)) + (setq last-real-pos (1+ group-\;-pos))) + (and comment-end + (> comment-end group-\;-pos) + (setq comment-end nil + comment-beg nil)) + (and quote-end + (> quote-end group-\;-pos) + (setq quote-end nil + quote-beg nil)) + (narrow-to-region (point-min) group-\;-pos)) + + ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any + ;; others. + ;; Hell, go ahead an nuke all of the commas. + ;; **** This will cause problems when we start handling commas in + ;; the PHRASE part .... no it won't ... yes it will ... ????? + (mail-nuke-elements-outside-range ,-pos 1 1) + + ;; can only have multiple @s inside < >. The fact that some MTAs + ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is + ;; handled above. + + ;; Locate PHRASE part of ROUTE-ADDR. + (cond (<-pos + (goto-char <-pos) + (skip-chars-backward mail-whitespace) + (setq phrase-end (point)) + (goto-char (or ;;group-:-pos + (point-min))) + (skip-chars-forward mail-whitespace) + (if (< (point) phrase-end) + (setq phrase-beg (point)) + (setq phrase-end nil)))) + + ;; handle ROUTE-ADDRS with real ROUTEs. + ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and + ;; any % or ! must be semantically meaningless. + ;; TODO: do this processing into canonicalization buffer + (cond (route-addr-:-pos + (setq !-pos nil + %-pos nil + >-pos (copy-marker >-pos) + route-addr-:-pos (copy-marker route-addr-:-pos)) + (goto-char >-pos) + (insert-before-markers ?X) + (goto-char (car @-pos)) + (while (setq @-pos (cdr @-pos)) + (delete-char 1) + (setq %-pos (cons (point-marker) %-pos)) + (insert "%") + (goto-char (1- >-pos)) + (save-excursion + (insert-buffer-substring extraction-buffer + (car @-pos) route-addr-:-pos) + (delete-region (car @-pos) route-addr-:-pos)) + (or (cdr @-pos) + (setq saved-@-pos (list (point))))) + (setq @-pos saved-@-pos) + (goto-char >-pos) + (delete-char -1) + (mail-nuke-char-at route-addr-:-pos) + (mail-demarkerize route-addr-:-pos) + (setq route-addr-:-pos nil + >-pos (mail-demarkerize >-pos) + %-pos (mapcar 'mail-demarkerize %-pos)))) + + ;; de-listify @-pos + (setq @-pos (car @-pos)) + + ;; TODO: remove comments in the middle of an address + + (set-buffer canonicalization-buffer) + + (buffer-flush-undo canonicalization-buffer) + (set-syntax-table address-syntax-table) + (setq case-fold-search nil) + + (widen) + (erase-buffer) + (insert-buffer-substring extraction-buffer) + + (if <-pos + (narrow-to-region (progn + (goto-char (1+ <-pos)) + (skip-chars-forward mail-whitespace) + (point)) + >-pos) + ;; ****** Oh no! What if the address is completely empty! + (narrow-to-region first-real-pos last-real-pos)) + + (and @-pos %-pos + (mail-nuke-elements-outside-range %-pos (point-min) @-pos)) + (and %-pos !-pos + (mail-nuke-elements-outside-range !-pos (point-min) (car %-pos))) + (and @-pos !-pos (not %-pos) + (mail-nuke-elements-outside-range !-pos (point-min) @-pos)) + + ;; Error condition:?? (and %-pos (not @-pos)) + + (cond (!-pos + ;; **** I don't understand this save-restriction and the + ;; narrow-to-region inside it. Why did I do that? + (save-restriction + (cond ((and @-pos + mail-@-binds-tighter-than-!) + (goto-char @-pos) + (setq %-pos (cons (point) %-pos) + @-pos nil) + (delete-char 1) + (insert "%") + (setq insert-point (point-max))) + (mail-@-binds-tighter-than-! + (setq insert-point (point-max))) + (%-pos + (setq insert-point (mail-last-element %-pos) + saved-%-pos (mapcar 'mail-markerize %-pos) + %-pos nil + @-pos (mail-markerize @-pos))) + (@-pos + (setq insert-point @-pos) + (setq @-pos (mail-markerize @-pos))) + (t + (setq insert-point (point-max)))) + (narrow-to-region (point-min) insert-point) + (setq saved-!-pos (car !-pos)) + (while !-pos + (goto-char (point-max)) + (cond ((and (not @-pos) + (not (cdr !-pos))) + (setq @-pos (point)) + (insert-before-markers "@ ")) + (t + (setq %-pos (cons (point) %-pos)) + (insert-before-markers "% "))) + (backward-char 1) + (insert-buffer-substring + (current-buffer) + (if (nth 1 !-pos) + (1+ (nth 1 !-pos)) + (point-min)) + (car !-pos)) + (delete-char 1) + (or (save-excursion + (safe-move-sexp -1) + (skip-chars-backward mail-whitespace) + (eq ?. (preceding-char))) + (insert-before-markers + (if (save-excursion + (skip-chars-backward mail-whitespace) + (eq ?. (preceding-char))) + "" + ".") + "uucp")) + (setq !-pos (cdr !-pos)))) + (and saved-%-pos + (setq %-pos (append (mapcar 'mail-demarkerize saved-%-pos) + %-pos))) + (setq @-pos (mail-demarkerize @-pos)) + (narrow-to-region (1+ saved-!-pos) (point-max)))) + (cond ((and %-pos + (not @-pos)) + (goto-char (car %-pos)) + (delete-char 1) + (setq @-pos (point)) + (insert "@") + (setq %-pos (cdr %-pos)))) + (setq %-pos (nreverse %-pos)) + ;; RFC 1034 doesn't approve of this, oh well: + (downcase-region (or (car %-pos) @-pos (point-max)) (point-max)) + (cond (%-pos ; implies @-pos valid + (setq temp %-pos) + (catch 'truncated + (while temp + (goto-char (or (nth 1 temp) + @-pos)) + (skip-chars-backward mail-whitespace) + (save-excursion + (safe-move-sexp -1) + (setq domain-pos (point)) + (skip-chars-backward mail-whitespace) + (setq \.-pos (eq ?. (preceding-char)))) + (cond ((and \.-pos + (get + (intern + (buffer-substring domain-pos (point))) + 'domain-name)) + (narrow-to-region (point-min) (point)) + (goto-char (car temp)) + (delete-char 1) + (setq @-pos (point)) + (setcdr temp nil) + (setq %-pos (delq @-pos %-pos)) + (insert "@") + (throw 'truncated t))) + (setq temp (cdr temp)))))) + (setq mbox-beg (point-min) + mbox-end (if %-pos (car %-pos) + (or @-pos + (point-max)))) + + ;; Done canonicalizing address. + + (set-buffer extraction-buffer) + + ;; Find the full name + + (cond ((and phrase-beg + (eq quote-beg phrase-beg) + (<= quote-end phrase-end)) + (narrow-to-region (1+ quote-beg) (1- quote-end)) + (undo-backslash-quoting (point-min) (point-max))) + (phrase-beg + (narrow-to-region phrase-beg phrase-end)) + (comment-beg + (narrow-to-region (1+ comment-beg) (1- comment-end)) + (undo-backslash-quoting (point-min) (point-max))) + (t + ;; *** Work in canon buffer instead? No, can't. Hmm. + (delete-region (point-min) (point-max)) + (insert-buffer-substring canonicalization-buffer + mbox-beg mbox-end) + (goto-char (point-min)) + (setq \.-ends-name (search-forward "_" nil t)) + (goto-char (point-min)) + (while (progn + (skip-chars-forward mail-whitespace) + (not (eobp))) + (setq char (char-after (point))) + (cond + ((eq char ?\") + (setq quote-beg (point)) + (or (safe-move-sexp 1) + ;; TODO: handle this error condition!!!!! + (forward-char 1)) + ;; take into account deletions + (setq quote-end (- (point) 2)) + (save-excursion + (backward-char 1) + (delete-char 1) + (goto-char quote-beg) + (delete-char 1)) + (undo-backslash-quoting quote-beg quote-end) + (or (eq mail-space-char (char-after (point))) + (insert " ")) + (setq \.-ends-name t)) + ((eq char ?.) + (if (eq (char-after (1+ (point))) ?_) + (progn + (forward-char 1) + (delete-char 1) + (insert mail-space-char)) + (if \.-ends-name + (narrow-to-region (point-min) (point)) + (delete-char 1) + (insert " ")))) + ((memq (char-syntax char) '(?. ?\\)) + (delete-char 1) + (insert " ")) + (t + (setq atom-beg (point)) + (forward-word 1) + (setq atom-end (point)) + (save-restriction + (narrow-to-region atom-beg atom-end) + (goto-char (point-min)) + (while (re-search-forward "\\([^_]+\\)_" nil t) + (replace-match "\\1 ")) + (goto-char (point-max)))))))) + + (set-syntax-table address-text-syntax-table) + + (setq xxx (variant-method (buffer-string))) + (delete-region (point-min) (point-max)) + (insert xxx) + (goto-char (point-min)) + +;; ;; Compress whitespace +;; (goto-char (point-min)) +;; (while (re-search-forward "[ \t\n]+" nil t) +;; (replace-match " ")) +;; +;; ;; Fix . used as space +;; (goto-char (point-min)) +;; (while (re-search-forward mail-bad-\.-pattern nil t) +;; (replace-match "\\1 \\2")) +;; +;; ;; Delete trailing parenthesized comment +;; (goto-char (point-max)) +;; (skip-chars-backward mail-whitespace) +;; (cond ((memq (char-after (1- (point))) '(?\) ?\} ?\])) +;; (setq comment-end (point)) +;; (set-syntax-table address-text-comment-syntax-table) +;; (or (safe-move-sexp -1) +;; (backward-char 1)) +;; (set-syntax-table address-text-syntax-table) +;; (setq comment-beg (point)) +;; (skip-chars-backward mail-whitespace) +;; (if (bobp) +;; (narrow-to-region (1+ comment-beg) (1- comment-end)) +;; (narrow-to-region (point-min) (point))))) +;; +;; ;; Find, save, and delete any name suffix +;; ;; *** Broken! +;; (goto-char (point-min)) +;; (cond ((re-search-forward mail-full-name-suffix-pattern nil t) +;; (setq name-suffix (buffer-substring (match-beginning 3) +;; (match-end 3))) +;; (replace-match "\\1 \\4"))) +;; +;; ;; Delete ALL CAPS words and after, if preceded by mixed-case or +;; ;; lowercase words. Eg. XT-DEM. +;; (goto-char (point-min)) +;; ;; ## This will lose on something like "SMITH MAX". +;; ;; ## maybe it should be +;; ;; ## " \\([A-Z]+[-_/][A-Z]+\\|[A-Z][A-Z][A-Z]\\)\\b.*[^A-Z \t]" +;; ;; ## that is, three-letter-upper-case-word with non-upper-case +;; ;; ## characters following it. +;; (if (re-search-forward mail-mixed-case-name-pattern nil t) +;; (if (re-search-forward mail-weird-acronym-pattern nil t) +;; (narrow-to-region (point-min) (match-beginning 0)))) +;; +;; ;; Delete trailing alternative address +;; (goto-char (point-min)) +;; (if (re-search-forward mail-alternative-address-pattern nil t) +;; (narrow-to-region (point-min) (match-beginning 0))) +;; +;; ;; Delete trailing comment +;; (goto-char (point-min)) +;; (if (re-search-forward mail-trailing-comment-start-pattern nil t) +;; (or (progn +;; (goto-char (match-beginning 0)) +;; (skip-chars-backward mail-whitespace) +;; (bobp)) +;; (narrow-to-region (point-min) (match-beginning 0)))) +;; +;; ;; Delete trailing comma-separated comment +;; (goto-char (point-min)) +;; ;; ## doesn't this break "Smith, John"? Yes. +;; (re-search-forward mail-last-name-first-pattern nil t) +;; (while (search-forward "," nil t) +;; (or (save-excursion +;; (backward-char 2) +;; (looking-at mail-full-name-suffix-pattern)) +;; (narrow-to-region (point-min) (1- (point))))) +;; +;; ;; Delete telephone numbers and ham radio call signs +;; (goto-char (point-min)) +;; (if (re-search-forward mail-telephone-extension-pattern nil t) +;; (narrow-to-region (point-min) (match-beginning 0))) +;; (goto-char (point-min)) +;; (if (re-search-forward mail-ham-call-sign-pattern nil t) +;; (if (eq (match-beginning 0) (point-min)) +;; (narrow-to-region (match-end 0) (point-max)) +;; (narrow-to-region (point-min) (match-beginning 0)))) +;; +;; ;; Delete trailing word followed immediately by . +;; (goto-char (point-min)) +;; ;; ## what's this for? doesn't it mess up "Public, Harry Q."? No. +;; (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t) +;; (narrow-to-region (point-min) (match-beginning 0))) +;; +;; ;; Handle & substitution +;; ;; TODO: remember to disable middle initial guessing +;; (goto-char (point-min)) +;; (cond ((re-search-forward "\\( \\|\\`\\)&\\( \\|\\'\\)" nil t) +;; (goto-char (match-end 1)) +;; (delete-char 1) +;; (capitalize-region +;; (point) +;; (progn +;; (insert-buffer-substring canonicalization-buffer +;; mbox-beg mbox-end) +;; (point))))) +;; +;; ;; Delete nickname +;; (goto-char (point-min)) +;; (if (re-search-forward mail-nickname-pattern nil t) +;; (replace-match (if (eq (match-beginning 2) (1- (match-end 2))) +;; " \\2 " +;; " "))) +;; +;; ;; Fixup initials +;; (while (progn +;; (goto-char (point-min)) +;; (re-search-forward mail-bad-initials-pattern nil t)) +;; (replace-match +;; (if (match-beginning 4) +;; "\\1. \\4" +;; (if (match-beginning 5) +;; "\\1. \\5" +;; "\\1. ")))) +;; +;; ;; Delete title +;; (goto-char (point-min)) +;; (if (re-search-forward mail-full-name-prefixes nil t) +;; (narrow-to-region (point) (point-max))) +;; +;; ;; Delete trailing and preceding non-name characters +;; (goto-char (point-min)) +;; (skip-chars-forward mail-non-begin-name-chars) +;; (narrow-to-region (point) (point-max)) +;; (goto-char (point-max)) +;; (skip-chars-backward mail-non-end-name-chars) +;; (narrow-to-region (point-min) (point)) + + ;; If name is "First Last" and userid is "F?L", then assume + ;; the middle initial is the second letter in the userid. + ;; initially by Jamie Zawinski <jwz@lucid.com> + (cond ((and (eq 3 (- mbox-end mbox-beg)) + (progn + (goto-char (point-min)) + (looking-at mail-two-name-pattern))) + (setq fi (char-after (match-beginning 0)) + li (char-after (match-beginning 3))) + (save-excursion + (set-buffer canonicalization-buffer) + ;; char-equal is ignoring case here, so no need to upcase + ;; or downcase. + (let ((case-fold-search t)) + (and (char-equal fi (char-after mbox-beg)) + (char-equal li (char-after (1- mbox-end))) + (setq mi (char-after (1+ mbox-beg)))))) + (cond ((and mi + ;; TODO: use better table than syntax table + (eq ?w (char-syntax mi))) + (goto-char (match-beginning 3)) + (insert (upcase mi) ". "))))) + +;; ;; Restore suffix +;; (cond (name-suffix +;; (goto-char (point-max)) +;; (insert ", " name-suffix) +;; (backward-word 1) +;; (cond ((memq (following-char) '(?j ?J ?s ?S)) +;; (capitalize-word 1) +;; (or (eq (following-char) ?.) +;; (insert ?.))) +;; (t +;; (upcase-word 1))))) + + ;; Result + (list (buffer-string) + (progn + (set-buffer canonicalization-buffer) + (buffer-string))) + ))) + +;; TODO: put this back in the above function now that it's proven: +(defun variant-method (string) + (let ((variant-buffer (get-buffer-create "*variant method buffer*")) + (word-count 0) + mixed-case-flag lower-case-flag upper-case-flag + suffix-flag last-name-comma-flag + comment-beg comment-end initial beg end + ) + (save-excursion + (set-buffer variant-buffer) + (buffer-flush-undo variant-buffer) + (set-syntax-table address-text-syntax-table) + (widen) + (erase-buffer) + (setq case-fold-search nil) + + (insert string) + + ;; Fix . used as space + (goto-char (point-min)) + (while (re-search-forward mail-bad-\.-pattern nil t) + (replace-match "\\1 \\2")) + + ;; Skip any initial garbage. + (goto-char (point-min)) + (skip-chars-forward mail-non-begin-name-chars) + (skip-chars-backward "& \"") + (narrow-to-region (point) (point-max)) + + (catch 'stop + (while t + (skip-chars-forward mail-whitespace) + + (cond + + ;; Delete title + ((and (eq word-count 0) + (looking-at mail-full-name-prefixes)) + (goto-char (match-end 0)) + (narrow-to-region (point) (point-max))) + + ;; Stop after name suffix + ((and (>= word-count 2) + (looking-at mail-full-name-suffix-pattern)) + (skip-chars-backward mail-whitespace) + (setq suffix-flag (point)) + (if (eq ?, (following-char)) + (forward-char 1) + (insert ?,)) + ;; Enforce at least one space after comma + (or (eq mail-space-char (following-char)) + (insert mail-space-char)) + (skip-chars-forward mail-whitespace) + (cond ((memq (following-char) '(?j ?J ?s ?S)) + (capitalize-word 1) + (if (eq (following-char) ?.) + (forward-char 1) + (insert ?.))) + (t + (upcase-word 1))) + (setq word-count (1+ word-count)) + (throw 'stop t)) + + ;; Handle SCA names + ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" + (setq word-count 0) + (goto-char (match-beginning 1)) + (narrow-to-region (point) (point-max))) + + ;; Various stopping points + ((or + ;; Stop before ALL CAPS acronyms, if preceded by mixed-case or + ;; lowercase words. Eg. XT-DEM. + (and (>= word-count 2) + (or mixed-case-flag lower-case-flag) + (looking-at mail-weird-acronym-pattern) + (not (looking-at mail-roman-numeral-pattern))) + ;; Stop before 4-or-more letter lowercase words preceded by + ;; mixed case or uppercase words. + (and (>= word-count 2) + (or upper-case-flag mixed-case-flag) + (looking-at "[a-z][a-z][a-z][a-z]+\\b")) + ;; Stop before trailing alternative address + (looking-at mail-alternative-address-pattern) + ;; Stop before trailing comment not introduced by comma + (looking-at mail-trailing-comment-start-pattern) + ;; Stop before telephone numbers + (looking-at mail-telephone-extension-pattern)) + (throw 'stop t)) + + ;; Check for initial last name followed by comma + ((and (eq ?, (following-char)) + (eq word-count 1)) + (forward-char 1) + (setq last-name-comma-flag t) + (or (eq mail-space-char (following-char)) + (insert mail-space-char))) + + ;; Stop before trailing comma-separated comment + ((eq ?, (following-char)) + (throw 'stop t)) + + ;; Delete parenthesized/quoted comment/nickname + ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) + (setq comment-beg (point)) + (set-syntax-table address-text-comment-syntax-table) + (cond ((memq (following-char) '(?\' ?\`)) + (if (eq ?\' (following-char)) + (forward-char 1)) + (or (search-forward "'" nil t) + (delete-char 1))) + (t + (or (safe-move-sexp 1) + (goto-char (point-max))))) + (set-syntax-table address-text-syntax-table) + (setq comment-end (point)) + (cond + ;; Handle case of entire name being quoted + ((and (eq word-count 0) + (looking-at " *\\'") + (>= (- comment-end comment-beg) 2)) + (narrow-to-region (1+ comment-beg) (1- comment-end)) + (goto-char (point-min))) + (t + ;; Handle case of quoted initial + (if (and (or (= 3 (- comment-end comment-beg)) + (and (= 4 (- comment-end comment-beg)) + (eq ?. (char-after (+ 2 comment-beg))))) + (not (looking-at " *\\'"))) + (setq initial (char-after (1+ comment-beg))) + (setq initial nil)) + (delete-region comment-beg comment-end) + (if initial + (insert initial ". "))))) + + ;; Delete ham radio call signs + ((looking-at mail-ham-call-sign-pattern) + (delete-region (match-beginning 0) (match-end 0))) + + ;; Handle & substitution + ;; TODO: remember to disable middle initial guessing + ((and (or (bobp) + (eq mail-space-char (preceding-char))) + (looking-at "&\\( \\|\\'\\)")) + (delete-char 1) + (capitalize-region + (point) + (progn + (insert-buffer-substring canonicalization-buffer + mbox-beg mbox-end) + (point)))) + + ;; Fixup initials + ((looking-at mail-initial-pattern) + (or (eq (following-char) (upcase (following-char))) + (setq lower-case-flag t)) + (forward-char 1) + (if (eq ?. (following-char)) + (forward-char 1) + (insert ?.)) + (or (eq mail-space-char (following-char)) + (insert mail-space-char)) + (setq word-count (1+ word-count))) + + ;; Regular name words + ((looking-at mail-name-pattern) + (setq beg (point)) + (setq end (match-end 0)) + (set (if (re-search-forward "[a-z]" end t) + (if (progn + (goto-char beg) + (re-search-forward "[A-Z]" end t)) + 'mixed-case-flag + 'lower-case-flag) + 'upper-case-flag) t) + (goto-char end) + (setq word-count (1+ word-count))) + + (t + (throw 'stop t))))) + + (narrow-to-region (point-min) (point)) + + ;; Delete trailing word followed immediately by . + (cond ((not suffix-flag) + (goto-char (point-min)) + (if (re-search-forward "\\b[A-Za-z][A-Za-z]+\\. *\\'" nil t) + (narrow-to-region (point-min) (match-beginning 0))))) + + ;; If last name first put it at end (but before suffix) + (cond (last-name-comma-flag + (goto-char (point-min)) + (search-forward ",") + (setq end (1- (point))) + (goto-char (or suffix-flag (point-max))) + (or (eq mail-space-char (preceding-char)) + (insert mail-space-char)) + (insert-buffer-substring (current-buffer) (point-min) end) + (narrow-to-region (1+ end) (point-max)))) + + (goto-char (point-max)) + (skip-chars-backward mail-non-end-name-chars) + (if (eq ?. (following-char)) + (forward-char 1)) + (narrow-to-region (point) + (progn + (goto-char (point-min)) + (skip-chars-forward mail-non-begin-name-chars) + (point))) + + ;; Compress whitespace + (goto-char (point-min)) + (while (re-search-forward "[ \t\n]+" nil t) + (replace-match " ")) + + (buffer-substring (point-min) (point-max)) + + ))) + +;; The country names are just in there for show right now, and because +;; Jamie thought it would be neat. They aren't used yet. + +;; Keep in mind that the country abbreviations follow ISO-3166. There is +;; a U.S. FIPS that specifies a different set of two-letter country +;; abbreviations. + +;; TODO: put this in its own obarray, instead of cluttering up the main +;; symbol table with junk. + +(mapcar + (function + (lambda (x) + (if (symbolp x) + (put x 'domain-name t) + (put (car x) 'domain-name (nth 1 x))))) + '((ag "Antigua") + (ar "Argentina") ; Argentine Republic + arpa ; Advanced Projects Research Agency + (at "Austria") ; The Republic of _ + (au "Australia") + (bb "Barbados") + (be "Belgium") ; The Kingdom of _ + (bg "Bulgaria") + bitnet ; Because It's Time NET + (bo "Bolivia") ; Republic of _ + (br "Brazil") ; The Federative Republic of _ + (bs "Bahamas") + (bz "Belize") + (ca "Canada") + (ch "Switzerland") ; The Swiss Confederation + (cl "Chile") ; The Republic of _ + (cn "China") ; The People's Republic of _ + (co "Columbia") + com ; Commercial + (cr "Costa Rica") ; The Republic of _ + (cs "Czechoslovakia") + (de "Germany") + (dk "Denmark") + (dm "Dominica") + (do "Dominican Republic") ; The _ + (ec "Ecuador") ; The Republic of _ + edu ; Educational + (eg "Egypt") ; The Arab Republic of _ + (es "Spain") ; The Kingdom of _ + (fi "Finland") ; The Republic of _ + (fj "Fiji") + (fr "France") + gov ; Government (U.S.A.) + (gr "Greece") ; The Hellenic Republic + (hk "Hong Kong") + (hu "Hungary") ; The Hungarian People's Republic (???) + (ie "Ireland") + (il "Israel") ; The State of _ + (in "India") ; The Republic of _ + int ; something British, don't know what + (is "Iceland") ; The Republic of _ + (it "Italy") ; The Italian Republic + (jm "Jamaica") + (jp "Japan") + (kn "St. Kitts and Nevis") + (kr "South Korea") + (lc "St. Lucia") + (lk "Sri Lanka") ; The Democratic Socialist Republic of _ + mil ; Military (U.S.A.) + (mx "Mexico") ; The United Mexican States + (my "Malaysia") ; changed to Myanmar???? + (na "Namibia") + nato ; North Atlantic Treaty Organization + net ; Network + (ni "Nicaragua") ; The Republic of _ + (nl "Netherlands") ; The Kingdom of the _ + (no "Norway") ; The Kingdom of _ + (nz "New Zealand") + org ; Organization + (pe "Peru") + (pg "Papua New Guinea") + (ph "Philippines") ; The Republic of the _ + (pl "Poland") + (pr "Puerto Rico") + (pt "Portugal") ; The Portugese Republic + (py "Paraguay") + (se "Sweden") ; The Kingdom of _ + (sg "Singapore") ; The Republic of _ + (sr "Suriname") + (su "Soviet Union") + (th "Thailand") ; The Kingdom of _ + (tn "Tunisia") + (tr "Turkey") ; The Republic of _ + (tt "Trinidad and Tobago") + (tw "Taiwan") + (uk "United Kingdom") ; The _ of Great Britain + unter-dom ; something German + (us "U.S.A.") ; The United States of America + uucp ; Unix to Unix CoPy + (uy "Uruguay") ; The Eastern Republic of _ + (vc "St. Vincent and the Grenadines") + (ve "Venezuela") ; The Republic of _ + (yu "Yugoslavia") ; The Socialist Federal Republic of _ + ;; Also said to be Zambia ... + (za "South Africa") ; The Republic of _ (why not Zaire???) + (zw "Zimbabwe") ; Republic of _ + )) +;; fipnet + + +;; Code for testing. + +(defun time-extract () + (let (times list) + (setq times (cons (current-time-string) times) + list problem-address-alist) + (while list + (mail-extract-address-components (car (car list))) + (setq list (cdr list))) + (setq times (cons (current-time-string) times)) + (nreverse times))) + +(defun test-extract (&optional starting-point) + (interactive) + (set-buffer (get-buffer-create "*Testing*")) + (erase-buffer) + (sit-for 0) + (mapcar 'test-extract-internal + (if starting-point + (memq starting-point problem-address-alist) + problem-address-alist))) + +(defvar failed-item) +(defun test-extract-internal (item) + (setq failed-item item) + (let* ((address (car item)) + (correct-name (nth 1 item)) + (correct-canon (nth 2 item)) + (result (mail-extract-address-components address)) + (name (car result)) + (canon (nth 1 result)) + (name-correct (or (null correct-name) + (string-equal (downcase correct-name) + (downcase name)))) + (canon-correct (or (null correct-canon) + (string-equal correct-canon canon)))) + (cond ((not (and name-correct canon-correct)) + (pop-to-buffer "*Testing*") + (select-window (get-buffer-window (current-buffer))) + (goto-char (point-max)) + (insert "Address: " address "\n") + (if (not name-correct) + (insert " Correct Name: [" correct-name + "]\; Result: [" name "]\n")) + (if (not canon-correct) + (insert " Correct Canon: [" correct-canon + "]\; Result: [" canon "]\n")) + (insert "\n") + (sit-for 0)))) + (setq failed-item nil)) + +(defun test-continue-extract () + (interactive) + (test-extract failed-item)) + + +;; Assorted junk. + +;; warsaw@nlm.nih.gov (A Bad Dude -- Barry Warsaw) + +;;'(from +;; reply-to +;; return-path +;; x-uucp-from +;; sender +;; resent-from +;; resent-sender +;; resent-reply-to) + +;;; mail-extr.el ends here