comparison lisp/mail/mail-extr.el @ 47939:5f47d61ffbdc

Fix typo in comment.
author Juanma Barranquero <lekktu@gmail.com>
date Fri, 18 Oct 2002 08:52:37 +0000
parents a298684644ca
children a4bb96832880 d7ddb3e565de
comparison
equal deleted inserted replaced
47938:65eaf21a0a44 47939:5f47d61ffbdc
27 ;;; Commentary: 27 ;;; Commentary:
28 28
29 ;; The entry point of this code is 29 ;; The entry point of this code is
30 ;; 30 ;;
31 ;; mail-extract-address-components: (address &optional all) 31 ;; mail-extract-address-components: (address &optional all)
32 ;; 32 ;;
33 ;; Given an RFC-822 ADDRESS, extract full name and canonical address. 33 ;; Given an RFC-822 ADDRESS, extract full name and canonical address.
34 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). 34 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
35 ;; If no name can be extracted, FULL-NAME will be nil. 35 ;; If no name can be extracted, FULL-NAME will be nil.
36 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible 36 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
37 ;; (narrowed) portion of the buffer will be interpreted as the address. 37 ;; (narrowed) portion of the buffer will be interpreted as the address.
38 ;; (This feature exists so that the clever caller might be able to avoid 38 ;; (This feature exists so that the clever caller might be able to avoid
39 ;; consing a string.) 39 ;; consing a string.)
40 ;; If ADDRESS contains more than one RFC-822 address, only the first is 40 ;; If ADDRESS contains more than one RFC-822 address, only the first is
41 ;; returned. 41 ;; returned.
59 ;; We have an extensive test-case collection of funny addresses if you want to 59 ;; We have an extensive test-case collection of funny addresses if you want to
60 ;; work with the code. Developing this code requires frequent testing to 60 ;; work with the code. Developing this code requires frequent testing to
61 ;; make sure you're not breaking functionality. The test cases aren't included 61 ;; make sure you're not breaking functionality. The test cases aren't included
62 ;; because they are over 100K. 62 ;; because they are over 100K.
63 ;; 63 ;;
64 ;; If you find an address that mail-extr fails on, please send it to the 64 ;; If you find an address that mail-extr fails on, please send it to the
65 ;; maintainer along with what you think the correct results should be. We do 65 ;; maintainer along with what you think the correct results should be. We do
66 ;; not consider it a bug if mail-extr mangles a comment that does not 66 ;; not consider it a bug if mail-extr mangles a comment that does not
67 ;; correspond to a real human full name, although we would prefer that 67 ;; correspond to a real human full name, although we would prefer that
68 ;; mail-extr would return the comment as-is. 68 ;; mail-extr would return the comment as-is.
69 ;; 69 ;;
70 ;; Features: 70 ;; Features:
71 ;; 71 ;;
72 ;; * Full name handling: 72 ;; * Full name handling:
119 ;; * arrange for testing with different relative precedences of ! vs. @ 119 ;; * arrange for testing with different relative precedences of ! vs. @
120 ;; and %. 120 ;; and %.
121 ;; * insert documentation strings! 121 ;; * insert documentation strings!
122 ;; * handle X.400-gatewayed addresses according to RFC 1148. 122 ;; * handle X.400-gatewayed addresses according to RFC 1148.
123 123
124 ;;; Change Log: 124 ;;; Change Log:
125 ;; 125 ;;
126 ;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com) 126 ;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com)
127 ;; 127 ;;
128 ;; * merged with jbw's latest version 128 ;; * merged with jbw's latest version
129 ;; 129 ;;
130 ;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@lucid.com) 130 ;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@lucid.com)
138 ;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@lucid.com) 138 ;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@lucid.com)
139 ;; 139 ;;
140 ;; * some more cleanup, doc, added provide 140 ;; * some more cleanup, doc, added provide
141 ;; 141 ;;
142 ;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu) 142 ;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu)
143 ;; 143 ;;
144 ;; * Made mail-full-name-prefixes a user-customizable variable. 144 ;; * Made mail-full-name-prefixes a user-customizable variable.
145 ;; Allow passing the address as a buffer as well as a string. 145 ;; Allow passing the address as a buffer as well as a string.
146 ;; Allow [ and ] as name characters (Finnish character set). 146 ;; Allow [ and ] as name characters (Finnish character set).
147 ;; 147 ;;
148 ;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu) 148 ;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu)
149 ;; 149 ;;
150 ;; * Handle "null" addresses. Handle = used for spacing in mailbox 150 ;; * Handle "null" addresses. Handle = used for spacing in mailbox
151 ;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are 151 ;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are
152 ;; missing their brackets. Handle uppercase "JR". Extract full 152 ;; missing their brackets. Handle uppercase "JR". Extract full
153 ;; names from X.400 addresses encoded in RFC-822. Fix bug in 153 ;; names from X.400 addresses encoded in RFC-822. Fix bug in
154 ;; handling of multiple addresses where first has trailing comment. 154 ;; handling of multiple addresses where first has trailing comment.
155 ;; Handle more kinds of telephone extension lead-ins. 155 ;; Handle more kinds of telephone extension lead-ins.
156 ;; 156 ;;
157 ;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu) 157 ;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu)
158 ;; 158 ;;
159 ;; * Handle HZ encoding for embedding GB encoded chinese characters. 159 ;; * Handle HZ encoding for embedding GB encoded chinese characters.
160 ;; 160 ;;
161 ;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu) 161 ;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu)
162 ;; 162 ;;
163 ;; * Fixed too broad matching of ham radio call signs. Fixed bug in 163 ;; * Fixed too broad matching of ham radio call signs. Fixed bug in
164 ;; handling an unmatched ' in a name string. Enhanced recognition 164 ;; handling an unmatched ' in a name string. Enhanced recognition
165 ;; of when . in the mailbox name terminates the name portion. 165 ;; of when . in the mailbox name terminates the name portion.
166 ;; Narrowed conversion of . to space to only the necessary 166 ;; Narrowed conversion of . to space to only the necessary
167 ;; situation. Deal with VMS's stupid date stamps. Handle a unique 167 ;; situation. Deal with VMS's stupid date stamps. Handle a unique
168 ;; way of introducing an alternate address. Fixed spacing bug I 168 ;; way of introducing an alternate address. Fixed spacing bug I
169 ;; introduced in switching last name order. Fixed bug in handling 169 ;; introduced in switching last name order. Fixed bug in handling
170 ;; address with ! and % but no @. Narrowed the cases in which 170 ;; address with ! and % but no @. Narrowed the cases in which
171 ;; certain trailing words are discarded. 171 ;; certain trailing words are discarded.
172 ;; 172 ;;
173 ;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu) 173 ;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu)
174 ;; 174 ;;
175 ;; * Fixed bugs in handling GROUP addresses. Certain words in the 175 ;; * Fixed bugs in handling GROUP addresses. Certain words in the
176 ;; middle of a name no longer terminate it. Handle LISTSERV list 176 ;; middle of a name no longer terminate it. Handle LISTSERV list
177 ;; names. Ignore comment field containing mailbox name. 177 ;; names. Ignore comment field containing mailbox name.
178 ;; 178 ;;
179 ;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu) 179 ;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu)
180 ;; 180 ;;
181 ;; * Moved variant-method code back into main function. Handle 181 ;; * Moved variant-method code back into main function. Handle
182 ;; underscores as spaces in comments. Handle leading nickname. Add 182 ;; underscores as spaces in comments. Handle leading nickname. Add
183 ;; flag to ignore single-word names. Other changes. 183 ;; flag to ignore single-word names. Other changes.
184 ;; 184 ;;
185 ;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu) 185 ;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu)
186 ;; 186 ;;
187 ;; * Added in changes by Rod Whitby and Jamie Zawinski. This 187 ;; * Added in changes by Rod Whitby and Jamie Zawinski. This
188 ;; includes the flag mail-extr-guess-middle-initial and the fix for 188 ;; includes the flag mail-extr-guess-middle-initial and the fix for
189 ;; handling multiple addresses correctly. (Whitby just changed 189 ;; handling multiple addresses correctly. (Whitby just changed
190 ;; a > to a <.) 190 ;; a > to a <.)
191 ;; 191 ;;
192 ;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu) 192 ;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu)
193 ;; 193 ;;
194 ;; * Cleaned up some more. Release version 1.0 to world. 194 ;; * Cleaned up some more. Release version 1.0 to world.
195 ;; 195 ;;
196 ;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu) 196 ;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu)
197 ;; 197 ;;
198 ;; * Cleaned up full name extraction extensively. 198 ;; * Cleaned up full name extraction extensively.
199 ;; 199 ;;
200 ;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu) 200 ;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu)
201 ;; 201 ;;
202 ;; * Total rewrite. Integrated mail-canonicalize-address into 202 ;; * Total rewrite. Integrated mail-canonicalize-address into
203 ;; mail-extract-address-components. Now handles GROUP addresses more 203 ;; mail-extract-address-components. Now handles GROUP addresses more
204 ;; or less correctly. Better handling of lots of different cases. 204 ;; or less correctly. Better handling of lots of different cases.
205 ;; 205 ;;
206 ;; Fri Jun 14 19:39:50 1991 206 ;; Fri Jun 14 19:39:50 1991
207 ;; * Created. 207 ;; * Created.
208 208
209 ;;; Code: 209 ;;; Code:
210 210
316 ;; Keep this set as minimal as possible. 316 ;; Keep this set as minimal as possible.
317 (defconst mail-extr-last-letters (purecopy "[:alpha:]`'.")) 317 (defconst mail-extr-last-letters (purecopy "[:alpha:]`'."))
318 318
319 (defconst mail-extr-leading-garbage "\\W+") 319 (defconst mail-extr-leading-garbage "\\W+")
320 320
321 ;; (defconst mail-extr-non-name-chars 321 ;; (defconst mail-extr-non-name-chars
322 ;; (purecopy (concat "^" mail-extr-all-letters "."))) 322 ;; (purecopy (concat "^" mail-extr-all-letters ".")))
323 ;; (defconst mail-extr-non-begin-name-chars 323 ;; (defconst mail-extr-non-begin-name-chars
324 ;; (purecopy (concat "^" mail-extr-first-letters))) 324 ;; (purecopy (concat "^" mail-extr-first-letters)))
325 ;; (defconst mail-extr-non-end-name-chars 325 ;; (defconst mail-extr-non-end-name-chars
326 ;; (purecopy (concat "^" mail-extr-last-letters))) 326 ;; (purecopy (concat "^" mail-extr-last-letters)))
327 327
328 ;; Matches an initial not followed by both a period and a space. 328 ;; Matches an initial not followed by both a period and a space.
329 ;; (defconst mail-extr-bad-initials-pattern 329 ;; (defconst mail-extr-bad-initials-pattern
330 ;; (purecopy 330 ;; (purecopy
331 ;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)" 331 ;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
332 ;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters))) 332 ;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters)))
333 333
334 ;; Matches periods used instead of spaces. Must not match the period 334 ;; Matches periods used instead of spaces. Must not match the period
335 ;; following an initial. 335 ;; following an initial.
361 361
362 ;; Matches a trailing uppercase (with other characters possible) acronym. 362 ;; Matches a trailing uppercase (with other characters possible) acronym.
363 ;; Must not match a trailing uppercase last name or trailing initial 363 ;; Must not match a trailing uppercase last name or trailing initial
364 (defconst mail-extr-weird-acronym-pattern 364 (defconst mail-extr-weird-acronym-pattern
365 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)")) 365 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)"))
366 366
367 ;; Matches a mixed-case or lowercase name (not an initial). 367 ;; Matches a mixed-case or lowercase name (not an initial).
368 ;; #### Match Latin1 lower case letters here too? 368 ;; #### Match Latin1 lower case letters here too?
369 ;; (defconst mail-extr-mixed-case-name-pattern 369 ;; (defconst mail-extr-mixed-case-name-pattern
370 ;; (purecopy 370 ;; (purecopy
371 ;; (format 371 ;; (format
374 ;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters 374 ;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters
375 ;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters))) 375 ;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters)))
376 376
377 ;; Matches a trailing alternative address. 377 ;; Matches a trailing alternative address.
378 ;; #### Match Latin1 letters here too? 378 ;; #### Match Latin1 letters here too?
379 ;; #### Match _ before @ here too? 379 ;; #### Match _ before @ here too?
380 (defconst mail-extr-alternative-address-pattern 380 (defconst mail-extr-alternative-address-pattern
381 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]")) 381 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]"))
382 382
383 ;; Matches a variety of trailing comments not including comma-delimited 383 ;; Matches a variety of trailing comments not including comma-delimited
384 ;; comments. 384 ;; comments.
433 mail-extr-last-letters))) 433 mail-extr-last-letters)))
434 434
435 ;; Matches a single word name. 435 ;; Matches a single word name.
436 ;; (defconst mail-extr-one-name-pattern 436 ;; (defconst mail-extr-one-name-pattern
437 ;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'"))) 437 ;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'")))
438 438
439 ;; Matches normal two names with missing middle initial 439 ;; Matches normal two names with missing middle initial
440 ;; The first name is not allowed to have a hyphen because this can cause 440 ;; The first name is not allowed to have a hyphen because this can cause
441 ;; false matches where the "middle initial" is actually the first letter 441 ;; false matches where the "middle initial" is actually the first letter
442 ;; of the second part of the first name. 442 ;; of the second part of the first name.
443 (defconst mail-extr-two-name-pattern 443 (defconst mail-extr-two-name-pattern
457 ;; 457 ;;
458 ;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is 458 ;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is
459 ;; encountered. The character '~' is an escape character. By convention, it 459 ;; encountered. The character '~' is an escape character. By convention, it
460 ;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the 460 ;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the
461 ;; following special meaning. 461 ;; following special meaning.
462 ;; 462 ;;
463 ;; o The escape sequence '~~' is interpreted as a '~'. 463 ;; o The escape sequence '~~' is interpreted as a '~'.
464 ;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB. 464 ;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
465 ;; o The escape sequence '~\n' is a line-continuation marker to be consumed 465 ;; o The escape sequence '~\n' is a line-continuation marker to be consumed
466 ;; with no output produced. 466 ;; with no output produced.
467 ;; 467 ;;
468 ;; In GB mode, characters are interpreted two bytes at a time as (pure) GB 468 ;; In GB mode, characters are interpreted two bytes at a time as (pure) GB
469 ;; codes until the escape-from-GB code '~}' is read. This code switches the 469 ;; codes until the escape-from-GB code '~}' is read. This code switches the
470 ;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}' 470 ;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}'
471 ;; ($7E7D) is outside the defined GB range.) 471 ;; ($7E7D) is outside the defined GB range.)
472 (defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern 472 (defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern
732 (buffer-disable-undo extraction-buffer) 732 (buffer-disable-undo extraction-buffer)
733 (set-syntax-table mail-extr-address-syntax-table) 733 (set-syntax-table mail-extr-address-syntax-table)
734 (widen) 734 (widen)
735 (erase-buffer) 735 (erase-buffer)
736 (setq case-fold-search nil) 736 (setq case-fold-search nil)
737 737
738 ;; Insert extra space at beginning to allow later replacement with < 738 ;; Insert extra space at beginning to allow later replacement with <
739 ;; without having to move markers. 739 ;; without having to move markers.
740 (insert ?\ ) 740 (insert ?\ )
741 741
742 ;; Insert the address itself. 742 ;; Insert the address itself.
752 (with-current-buffer (get-buffer-create canonicalization-buffer) 752 (with-current-buffer (get-buffer-create canonicalization-buffer)
753 (fundamental-mode) 753 (fundamental-mode)
754 (buffer-disable-undo canonicalization-buffer) 754 (buffer-disable-undo canonicalization-buffer)
755 (setq case-fold-search nil)) 755 (setq case-fold-search nil))
756 756
757 757
758 ;; Unfold multiple lines. 758 ;; Unfold multiple lines.
759 (goto-char (point-min)) 759 (goto-char (point-min))
760 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) 760 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
761 (replace-match "\\1 " t)) 761 (replace-match "\\1 " t))
762 762
763 ;; Loop over addresses until we have as many as we want. 763 ;; Loop over addresses until we have as many as we want.
764 (while (and (or all (null value-list)) 764 (while (and (or all (null value-list))
765 (progn (goto-char (point-min)) 765 (progn (goto-char (point-min))
766 (skip-chars-forward " \t") 766 (skip-chars-forward " \t")
767 (not (eobp)))) 767 (not (eobp))))
1010 ;; *** are hardly ever seen. 1010 ;; *** are hardly ever seen.
1011 ) 1011 )
1012 1012
1013 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any 1013 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
1014 ;; others. 1014 ;; others.
1015 ;; Hell, go ahead an nuke all of the commas. 1015 ;; Hell, go ahead and nuke all of the commas.
1016 ;; **** This will cause problems when we start handling commas in 1016 ;; **** This will cause problems when we start handling commas in
1017 ;; the PHRASE part .... no it won't ... yes it will ... ????? 1017 ;; the PHRASE part .... no it won't ... yes it will ... ?????
1018 (mail-extr-nuke-outside-range comma-pos 1 1) 1018 (mail-extr-nuke-outside-range comma-pos 1 1)
1019 1019
1020 ;; can only have multiple @s inside < >. The fact that some MTAs 1020 ;; can only have multiple @s inside < >. The fact that some MTAs
1493 (setq cbeg (point)) 1493 (setq cbeg (point))
1494 (skip-chars-forward " \t") 1494 (skip-chars-forward " \t")
1495 (if (bobp) 1495 (if (bobp)
1496 (delete-region (point) cbeg) 1496 (delete-region (point) cbeg)
1497 (just-one-space)))))) 1497 (just-one-space))))))
1498 1498
1499 ;; This was moved above. 1499 ;; This was moved above.
1500 ;; Fix . used as space 1500 ;; Fix . used as space
1501 ;; But it belongs here because it occurs not only as 1501 ;; But it belongs here because it occurs not only as
1502 ;; rypens@reks.uia.ac.be (Piet.Rypens) 1502 ;; rypens@reks.uia.ac.be (Piet.Rypens)
1503 ;; but also as 1503 ;; but also as
1522 (replace-match "\\1 \\2" t))))) 1522 (replace-match "\\1 \\2" t)))))
1523 1523
1524 ;; Loop over the words (and other junk) in the name. 1524 ;; Loop over the words (and other junk) in the name.
1525 (goto-char (point-min)) 1525 (goto-char (point-min))
1526 (while (not name-done-flag) 1526 (while (not name-done-flag)
1527 1527
1528 (when word-found-flag 1528 (when word-found-flag
1529 ;; Last time through this loop we skipped over a word. 1529 ;; Last time through this loop we skipped over a word.
1530 (setq last-word-beg this-word-beg) 1530 (setq last-word-beg this-word-beg)
1531 (setq drop-last-word-if-trailing-flag 1531 (setq drop-last-word-if-trailing-flag
1532 drop-this-word-if-trailing-flag) 1532 drop-this-word-if-trailing-flag)
1541 (setq drop-last-word-if-trailing-flag nil) 1541 (setq drop-last-word-if-trailing-flag nil)
1542 (setq mixed-case-flag nil) 1542 (setq mixed-case-flag nil)
1543 (setq lower-case-flag nil) 1543 (setq lower-case-flag nil)
1544 ;; (setq upper-case-flag nil) 1544 ;; (setq upper-case-flag nil)
1545 (setq begin-again-flag nil)) 1545 (setq begin-again-flag nil))
1546 1546
1547 ;; Initialize for this iteration of the loop. 1547 ;; Initialize for this iteration of the loop.
1548 (mail-extr-skip-whitespace-forward) 1548 (mail-extr-skip-whitespace-forward)
1549 (if (eq word-count 0) (narrow-to-region (point) (point-max))) 1549 (if (eq word-count 0) (narrow-to-region (point) (point-max)))
1550 (setq this-word-beg (point)) 1550 (setq this-word-beg (point))
1551 (setq drop-this-word-if-trailing-flag nil) 1551 (setq drop-this-word-if-trailing-flag nil)
1552 1552
1553 ;; Decide what to do based on what we are looking at. 1553 ;; Decide what to do based on what we are looking at.
1554 (cond 1554 (cond
1555 1555
1556 ;; Delete title 1556 ;; Delete title
1557 ((and (eq word-count 0) 1557 ((and (eq word-count 0)
1558 (looking-at mail-extr-full-name-prefixes)) 1558 (looking-at mail-extr-full-name-prefixes))
1559 (goto-char (match-end 0)) 1559 (goto-char (match-end 0))
1560 (narrow-to-region (point) (point-max))) 1560 (narrow-to-region (point) (point-max)))
1561 1561
1562 ;; Stop after name suffix 1562 ;; Stop after name suffix
1563 ((and (>= word-count 2) 1563 ((and (>= word-count 2)
1564 (looking-at mail-extr-full-name-suffix-pattern)) 1564 (looking-at mail-extr-full-name-suffix-pattern))
1565 (mail-extr-skip-whitespace-backward) 1565 (mail-extr-skip-whitespace-backward)
1566 (setq suffix-flag (point)) 1566 (setq suffix-flag (point))
1578 (insert ?.))) 1578 (insert ?.)))
1579 (t 1579 (t
1580 (upcase-word 1))) 1580 (upcase-word 1)))
1581 (setq word-found-flag t) 1581 (setq word-found-flag t)
1582 (setq name-done-flag t)) 1582 (setq name-done-flag t))
1583 1583
1584 ;; Handle SCA names 1584 ;; Handle SCA names
1585 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" 1585 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
1586 (goto-char (match-beginning 1)) 1586 (goto-char (match-beginning 1))
1587 (narrow-to-region (point) (point-max)) 1587 (narrow-to-region (point) (point-max))
1588 (setq begin-again-flag t)) 1588 (setq begin-again-flag t))
1589 1589
1590 ;; Check for initial last name followed by comma 1590 ;; Check for initial last name followed by comma
1591 ((and (eq ?, (following-char)) 1591 ((and (eq ?, (following-char))
1592 (eq word-count 1)) 1592 (eq word-count 1))
1593 (forward-char 1) 1593 (forward-char 1)
1594 (setq last-name-comma-flag t) 1594 (setq last-name-comma-flag t)
1595 (or (eq ?\ (following-char)) 1595 (or (eq ?\ (following-char))
1596 (insert ?\ ))) 1596 (insert ?\ )))
1597 1597
1598 ;; Stop before trailing comma-separated comment 1598 ;; Stop before trailing comma-separated comment
1599 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. 1599 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1600 ;; *** This case is redundant??? 1600 ;; *** This case is redundant???
1601 ;;((eq ?, (following-char)) 1601 ;;((eq ?, (following-char))
1602 ;; (setq name-done-flag t)) 1602 ;; (setq name-done-flag t))
1603 1603
1604 ;; Delete parenthesized/quoted comment/nickname 1604 ;; Delete parenthesized/quoted comment/nickname
1605 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) 1605 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
1606 (setq cbeg (point)) 1606 (setq cbeg (point))
1607 (set-syntax-table mail-extr-address-text-comment-syntax-table) 1607 (set-syntax-table mail-extr-address-text-comment-syntax-table)
1608 (cond ((memq (following-char) '(?\' ?\`)) 1608 (cond ((memq (following-char) '(?\' ?\`))
1630 (setq initial (char-after (1+ cbeg))) 1630 (setq initial (char-after (1+ cbeg)))
1631 (setq initial nil)) 1631 (setq initial nil))
1632 (delete-region cbeg cend) 1632 (delete-region cbeg cend)
1633 (if initial 1633 (if initial
1634 (insert initial ". "))))) 1634 (insert initial ". ")))))
1635 1635
1636 ;; Handle *Stupid* VMS date stamps 1636 ;; Handle *Stupid* VMS date stamps
1637 ((looking-at mail-extr-stupid-vms-date-stamp-pattern) 1637 ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
1638 (replace-match "" t)) 1638 (replace-match "" t))
1639 1639
1640 ;; Handle Chinese characters. 1640 ;; Handle Chinese characters.
1641 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern) 1641 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
1642 (goto-char (match-end 0)) 1642 (goto-char (match-end 0))
1643 (setq word-found-flag t)) 1643 (setq word-found-flag t))
1644 1644
1645 ;; Skip initial garbage characters. 1645 ;; Skip initial garbage characters.
1646 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. 1646 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
1647 ((and (eq word-count 0) 1647 ((and (eq word-count 0)
1648 (looking-at mail-extr-leading-garbage)) 1648 (looking-at mail-extr-leading-garbage))
1649 (goto-char (match-end 0)) 1649 (goto-char (match-end 0))
1650 ;; *** Skip backward over these??? 1650 ;; *** Skip backward over these???
1651 ;; (skip-chars-backward "& \"") 1651 ;; (skip-chars-backward "& \"")
1652 (narrow-to-region (point) (point-max))) 1652 (narrow-to-region (point) (point-max)))
1653 1653
1654 ;; Various stopping points 1654 ;; Various stopping points
1655 ((or 1655 ((or
1656 1656
1657 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case 1657 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
1658 ;; words. Example: XT-DEM. 1658 ;; words. Example: XT-DEM.
1659 (and (>= word-count 2) 1659 (and (>= word-count 2)
1660 mixed-case-flag 1660 mixed-case-flag
1661 (looking-at mail-extr-weird-acronym-pattern) 1661 (looking-at mail-extr-weird-acronym-pattern)
1662 (not (looking-at mail-extr-roman-numeral-pattern))) 1662 (not (looking-at mail-extr-roman-numeral-pattern)))
1663 1663
1664 ;; Stop before trailing alternative address 1664 ;; Stop before trailing alternative address
1665 (looking-at mail-extr-alternative-address-pattern) 1665 (looking-at mail-extr-alternative-address-pattern)
1666 1666
1667 ;; Stop before trailing comment not introduced by comma 1667 ;; Stop before trailing comment not introduced by comma
1668 ;; THIS CASE MUST BE AFTER AN EARLIER CASE. 1668 ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
1669 (looking-at mail-extr-trailing-comment-start-pattern) 1669 (looking-at mail-extr-trailing-comment-start-pattern)
1670 1670
1671 ;; Stop before telephone numbers 1671 ;; Stop before telephone numbers
1672 (and (>= word-count 1) 1672 (and (>= word-count 1)
1673 (looking-at mail-extr-telephone-extension-pattern))) 1673 (looking-at mail-extr-telephone-extension-pattern)))
1674 (setq name-done-flag t)) 1674 (setq name-done-flag t))
1675 1675
1676 ;; Delete ham radio call signs 1676 ;; Delete ham radio call signs
1677 ((looking-at mail-extr-ham-call-sign-pattern) 1677 ((looking-at mail-extr-ham-call-sign-pattern)
1678 (delete-region (match-beginning 0) (match-end 0))) 1678 (delete-region (match-beginning 0) (match-end 0)))
1679 1679
1680 ;; Fixup initials 1680 ;; Fixup initials
1681 ((looking-at mail-extr-initial-pattern) 1681 ((looking-at mail-extr-initial-pattern)
1682 (or (eq (following-char) (upcase (following-char))) 1682 (or (eq (following-char) (upcase (following-char)))
1683 (setq lower-case-flag t)) 1683 (setq lower-case-flag t))
1684 (forward-char 1) 1684 (forward-char 1)
1686 (forward-char 1) 1686 (forward-char 1)
1687 (insert ?.)) 1687 (insert ?.))
1688 (or (eq ?\ (following-char)) 1688 (or (eq ?\ (following-char))
1689 (insert ?\ )) 1689 (insert ?\ ))
1690 (setq word-found-flag t)) 1690 (setq word-found-flag t))
1691 1691
1692 ;; Handle BITNET LISTSERV list names. 1692 ;; Handle BITNET LISTSERV list names.
1693 ((and (eq word-count 0) 1693 ((and (eq word-count 0)
1694 (looking-at mail-extr-listserv-list-name-pattern)) 1694 (looking-at mail-extr-listserv-list-name-pattern))
1695 (narrow-to-region (match-beginning 1) (match-end 1)) 1695 (narrow-to-region (match-beginning 1) (match-end 1))
1696 (setq word-found-flag t) 1696 (setq word-found-flag t)
1697 (setq name-done-flag t)) 1697 (setq name-done-flag t))
1698 1698
1699 ;; Handle & substitution, when & is last and is not first. 1699 ;; Handle & substitution, when & is last and is not first.
1700 ((and (> word-count 0) 1700 ((and (> word-count 0)
1701 (eq ?\ (preceding-char)) 1701 (eq ?\ (preceding-char))
1702 (eq (following-char) ?&) 1702 (eq (following-char) ?&)
1703 (eq (1+ (point)) (point-max))) 1703 (eq (1+ (point)) (point-max)))
1720 1720
1721 ;; Regular name words 1721 ;; Regular name words
1722 ((looking-at mail-extr-name-pattern) 1722 ((looking-at mail-extr-name-pattern)
1723 (setq name-beg (point)) 1723 (setq name-beg (point))
1724 (setq name-end (match-end 0)) 1724 (setq name-end (match-end 0))
1725 1725
1726 ;; Certain words will be dropped if they are at the end. 1726 ;; Certain words will be dropped if they are at the end.
1727 (and (>= word-count 2) 1727 (and (>= word-count 2)
1728 (not lower-case-flag) 1728 (not lower-case-flag)
1729 (or 1729 (or
1730 ;; A trailing 4-or-more letter lowercase words preceded by 1730 ;; A trailing 4-or-more letter lowercase words preceded by
1731 ;; mixed case or uppercase words will be dropped. 1731 ;; mixed case or uppercase words will be dropped.
1732 (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'") 1732 (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'")
1733 ;; Drop a trailing word which is terminated with a period. 1733 ;; Drop a trailing word which is terminated with a period.
1734 (eq ?. (char-after (1- name-end)))) 1734 (eq ?. (char-after (1- name-end))))
1735 (setq drop-this-word-if-trailing-flag t)) 1735 (setq drop-this-word-if-trailing-flag t))
1736 1736
1737 ;; Set the flags that indicate whether we have seen a lowercase 1737 ;; Set the flags that indicate whether we have seen a lowercase
1738 ;; word, a mixed case word, and an uppercase word. 1738 ;; word, a mixed case word, and an uppercase word.
1739 (if (re-search-forward "[a-z]" name-end t) 1739 (if (re-search-forward "[a-z]" name-end t)
1740 (if (progn 1740 (if (progn
1741 (goto-char name-beg) 1741 (goto-char name-beg)
1742 (re-search-forward "[A-Z]" name-end t)) 1742 (re-search-forward "[A-Z]" name-end t))
1743 (setq mixed-case-flag t) 1743 (setq mixed-case-flag t)
1744 (setq lower-case-flag t)) 1744 (setq lower-case-flag t))
1745 ;; (setq upper-case-flag t) 1745 ;; (setq upper-case-flag t)
1746 ) 1746 )
1747 1747
1748 (goto-char name-end) 1748 (goto-char name-end)
1749 (setq word-found-flag t)) 1749 (setq word-found-flag t))
1750 1750
1751 ;; Allow a number as a word, if it doesn't mean anything else. 1751 ;; Allow a number as a word, if it doesn't mean anything else.
1752 ((looking-at "[0-9]+\\>") 1752 ((looking-at "[0-9]+\\>")
1756 (setq word-found-flag t)) 1756 (setq word-found-flag t))
1757 1757
1758 (t 1758 (t
1759 (setq name-done-flag t) 1759 (setq name-done-flag t)
1760 )) 1760 ))
1761 1761
1762 ;; Count any word that we skipped over. 1762 ;; Count any word that we skipped over.
1763 (if word-found-flag 1763 (if word-found-flag
1764 (setq word-count (1+ word-count)))) 1764 (setq word-count (1+ word-count))))
1765 1765
1766 ;; If the last thing in the name is 2 or more periods, or one or more 1766 ;; If the last thing in the name is 2 or more periods, or one or more
1767 ;; other sentence terminators (but not a single period) then keep them 1767 ;; other sentence terminators (but not a single period) then keep them
1768 ;; and the preceding word. This is for the benefit of whole sentences 1768 ;; and the preceding word. This is for the benefit of whole sentences
1769 ;; in the name field: it's better behavior than dropping the last word 1769 ;; in the name field: it's better behavior than dropping the last word
1770 ;; of the sentence... 1770 ;; of the sentence...
1775 ;; Drop everything after point and certain trailing words. 1775 ;; Drop everything after point and certain trailing words.
1776 (narrow-to-region (point-min) 1776 (narrow-to-region (point-min)
1777 (or (and drop-last-word-if-trailing-flag 1777 (or (and drop-last-word-if-trailing-flag
1778 last-word-beg) 1778 last-word-beg)
1779 (point))) 1779 (point)))
1780 1780
1781 ;; Xerox's mailers SUCK!!!!!! 1781 ;; Xerox's mailers SUCK!!!!!!
1782 ;; We simply refuse to believe that any last name is PARC or ADOC. 1782 ;; We simply refuse to believe that any last name is PARC or ADOC.
1783 ;; If it looks like that is the last name, that there is no meaningful 1783 ;; If it looks like that is the last name, that there is no meaningful
1784 ;; here at all. Actually I guess it would be best to map patterns 1784 ;; here at all. Actually I guess it would be best to map patterns
1785 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't 1785 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
1800 (insert ?\ )) 1800 (insert ?\ ))
1801 (insert-buffer-substring (current-buffer) (point-min) name-end) 1801 (insert-buffer-substring (current-buffer) (point-min) name-end)
1802 (goto-char name-end) 1802 (goto-char name-end)
1803 (skip-chars-forward "\t ,") 1803 (skip-chars-forward "\t ,")
1804 (narrow-to-region (point) (point-max))) 1804 (narrow-to-region (point) (point-max)))
1805 1805
1806 ;; Delete leading and trailing junk characters. 1806 ;; Delete leading and trailing junk characters.
1807 ;; *** This is probably completely unneeded now. 1807 ;; *** This is probably completely unneeded now.
1808 ;;(goto-char (point-max)) 1808 ;;(goto-char (point-max))
1809 ;;(skip-chars-backward mail-extr-non-end-name-chars) 1809 ;;(skip-chars-backward mail-extr-non-end-name-chars)
1810 ;;(if (eq ?. (following-char)) 1810 ;;(if (eq ?. (following-char))
1812 ;;(narrow-to-region (point) 1812 ;;(narrow-to-region (point)
1813 ;; (progn 1813 ;; (progn
1814 ;; (goto-char (point-min)) 1814 ;; (goto-char (point-min))
1815 ;; (skip-chars-forward mail-extr-non-begin-name-chars) 1815 ;; (skip-chars-forward mail-extr-non-begin-name-chars)
1816 ;; (point))) 1816 ;; (point)))
1817 1817
1818 ;; Compress whitespace 1818 ;; Compress whitespace
1819 (goto-char (point-min)) 1819 (goto-char (point-min))
1820 (while (re-search-forward "[ \t\n]+" nil t) 1820 (while (re-search-forward "[ \t\n]+" nil t)
1821 (replace-match (if (eobp) "" " ") t)) 1821 (replace-match (if (eobp) "" " ") t))
1822 ))) 1822 )))
2130 (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name))) 2130 (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name)))
2131 2131
2132 2132
2133 ;(let ((all nil)) 2133 ;(let ((all nil))
2134 ; (mapatoms #'(lambda (x) 2134 ; (mapatoms #'(lambda (x)
2135 ; (if (and (boundp x) 2135 ; (if (and (boundp x)
2136 ; (string-match "^mail-extr-" (symbol-name x))) 2136 ; (string-match "^mail-extr-" (symbol-name x)))
2137 ; (setq all (cons x all))))) 2137 ; (setq all (cons x all)))))
2138 ; (setq all (sort all #'string-lessp)) 2138 ; (setq all (sort all #'string-lessp))
2139 ; (cons 'setq 2139 ; (cons 'setq
2140 ; (apply 'nconc (mapcar #'(lambda (x) 2140 ; (apply 'nconc (mapcar #'(lambda (x)