Mercurial > emacs
comparison lisp/mail/mail-extr.el @ 47939:5f47d61ffbdc
Fix typo in comment.
author | Juanma Barranquero <lekktu@gmail.com> |
---|---|
date | Fri, 18 Oct 2002 08:52:37 +0000 |
parents | a298684644ca |
children | a4bb96832880 d7ddb3e565de |
comparison
equal
deleted
inserted
replaced
47938:65eaf21a0a44 | 47939:5f47d61ffbdc |
---|---|
27 ;;; Commentary: | 27 ;;; Commentary: |
28 | 28 |
29 ;; The entry point of this code is | 29 ;; The entry point of this code is |
30 ;; | 30 ;; |
31 ;; mail-extract-address-components: (address &optional all) | 31 ;; mail-extract-address-components: (address &optional all) |
32 ;; | 32 ;; |
33 ;; Given an RFC-822 ADDRESS, extract full name and canonical address. | 33 ;; Given an RFC-822 ADDRESS, extract full name and canonical address. |
34 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). | 34 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). |
35 ;; If no name can be extracted, FULL-NAME will be nil. | 35 ;; If no name can be extracted, FULL-NAME will be nil. |
36 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible | 36 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible |
37 ;; (narrowed) portion of the buffer will be interpreted as the address. | 37 ;; (narrowed) portion of the buffer will be interpreted as the address. |
38 ;; (This feature exists so that the clever caller might be able to avoid | 38 ;; (This feature exists so that the clever caller might be able to avoid |
39 ;; consing a string.) | 39 ;; consing a string.) |
40 ;; If ADDRESS contains more than one RFC-822 address, only the first is | 40 ;; If ADDRESS contains more than one RFC-822 address, only the first is |
41 ;; returned. | 41 ;; returned. |
59 ;; We have an extensive test-case collection of funny addresses if you want to | 59 ;; We have an extensive test-case collection of funny addresses if you want to |
60 ;; work with the code. Developing this code requires frequent testing to | 60 ;; work with the code. Developing this code requires frequent testing to |
61 ;; make sure you're not breaking functionality. The test cases aren't included | 61 ;; make sure you're not breaking functionality. The test cases aren't included |
62 ;; because they are over 100K. | 62 ;; because they are over 100K. |
63 ;; | 63 ;; |
64 ;; If you find an address that mail-extr fails on, please send it to the | 64 ;; If you find an address that mail-extr fails on, please send it to the |
65 ;; maintainer along with what you think the correct results should be. We do | 65 ;; maintainer along with what you think the correct results should be. We do |
66 ;; not consider it a bug if mail-extr mangles a comment that does not | 66 ;; not consider it a bug if mail-extr mangles a comment that does not |
67 ;; correspond to a real human full name, although we would prefer that | 67 ;; correspond to a real human full name, although we would prefer that |
68 ;; mail-extr would return the comment as-is. | 68 ;; mail-extr would return the comment as-is. |
69 ;; | 69 ;; |
70 ;; Features: | 70 ;; Features: |
71 ;; | 71 ;; |
72 ;; * Full name handling: | 72 ;; * Full name handling: |
119 ;; * arrange for testing with different relative precedences of ! vs. @ | 119 ;; * arrange for testing with different relative precedences of ! vs. @ |
120 ;; and %. | 120 ;; and %. |
121 ;; * insert documentation strings! | 121 ;; * insert documentation strings! |
122 ;; * handle X.400-gatewayed addresses according to RFC 1148. | 122 ;; * handle X.400-gatewayed addresses according to RFC 1148. |
123 | 123 |
124 ;;; Change Log: | 124 ;;; Change Log: |
125 ;; | 125 ;; |
126 ;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com) | 126 ;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@lucid.com) |
127 ;; | 127 ;; |
128 ;; * merged with jbw's latest version | 128 ;; * merged with jbw's latest version |
129 ;; | 129 ;; |
130 ;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@lucid.com) | 130 ;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@lucid.com) |
138 ;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@lucid.com) | 138 ;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@lucid.com) |
139 ;; | 139 ;; |
140 ;; * some more cleanup, doc, added provide | 140 ;; * some more cleanup, doc, added provide |
141 ;; | 141 ;; |
142 ;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu) | 142 ;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu) |
143 ;; | 143 ;; |
144 ;; * Made mail-full-name-prefixes a user-customizable variable. | 144 ;; * Made mail-full-name-prefixes a user-customizable variable. |
145 ;; Allow passing the address as a buffer as well as a string. | 145 ;; Allow passing the address as a buffer as well as a string. |
146 ;; Allow [ and ] as name characters (Finnish character set). | 146 ;; Allow [ and ] as name characters (Finnish character set). |
147 ;; | 147 ;; |
148 ;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu) | 148 ;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu) |
149 ;; | 149 ;; |
150 ;; * Handle "null" addresses. Handle = used for spacing in mailbox | 150 ;; * Handle "null" addresses. Handle = used for spacing in mailbox |
151 ;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are | 151 ;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are |
152 ;; missing their brackets. Handle uppercase "JR". Extract full | 152 ;; missing their brackets. Handle uppercase "JR". Extract full |
153 ;; names from X.400 addresses encoded in RFC-822. Fix bug in | 153 ;; names from X.400 addresses encoded in RFC-822. Fix bug in |
154 ;; handling of multiple addresses where first has trailing comment. | 154 ;; handling of multiple addresses where first has trailing comment. |
155 ;; Handle more kinds of telephone extension lead-ins. | 155 ;; Handle more kinds of telephone extension lead-ins. |
156 ;; | 156 ;; |
157 ;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu) | 157 ;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu) |
158 ;; | 158 ;; |
159 ;; * Handle HZ encoding for embedding GB encoded chinese characters. | 159 ;; * Handle HZ encoding for embedding GB encoded chinese characters. |
160 ;; | 160 ;; |
161 ;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu) | 161 ;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu) |
162 ;; | 162 ;; |
163 ;; * Fixed too broad matching of ham radio call signs. Fixed bug in | 163 ;; * Fixed too broad matching of ham radio call signs. Fixed bug in |
164 ;; handling an unmatched ' in a name string. Enhanced recognition | 164 ;; handling an unmatched ' in a name string. Enhanced recognition |
165 ;; of when . in the mailbox name terminates the name portion. | 165 ;; of when . in the mailbox name terminates the name portion. |
166 ;; Narrowed conversion of . to space to only the necessary | 166 ;; Narrowed conversion of . to space to only the necessary |
167 ;; situation. Deal with VMS's stupid date stamps. Handle a unique | 167 ;; situation. Deal with VMS's stupid date stamps. Handle a unique |
168 ;; way of introducing an alternate address. Fixed spacing bug I | 168 ;; way of introducing an alternate address. Fixed spacing bug I |
169 ;; introduced in switching last name order. Fixed bug in handling | 169 ;; introduced in switching last name order. Fixed bug in handling |
170 ;; address with ! and % but no @. Narrowed the cases in which | 170 ;; address with ! and % but no @. Narrowed the cases in which |
171 ;; certain trailing words are discarded. | 171 ;; certain trailing words are discarded. |
172 ;; | 172 ;; |
173 ;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu) | 173 ;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu) |
174 ;; | 174 ;; |
175 ;; * Fixed bugs in handling GROUP addresses. Certain words in the | 175 ;; * Fixed bugs in handling GROUP addresses. Certain words in the |
176 ;; middle of a name no longer terminate it. Handle LISTSERV list | 176 ;; middle of a name no longer terminate it. Handle LISTSERV list |
177 ;; names. Ignore comment field containing mailbox name. | 177 ;; names. Ignore comment field containing mailbox name. |
178 ;; | 178 ;; |
179 ;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu) | 179 ;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu) |
180 ;; | 180 ;; |
181 ;; * Moved variant-method code back into main function. Handle | 181 ;; * Moved variant-method code back into main function. Handle |
182 ;; underscores as spaces in comments. Handle leading nickname. Add | 182 ;; underscores as spaces in comments. Handle leading nickname. Add |
183 ;; flag to ignore single-word names. Other changes. | 183 ;; flag to ignore single-word names. Other changes. |
184 ;; | 184 ;; |
185 ;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu) | 185 ;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu) |
186 ;; | 186 ;; |
187 ;; * Added in changes by Rod Whitby and Jamie Zawinski. This | 187 ;; * Added in changes by Rod Whitby and Jamie Zawinski. This |
188 ;; includes the flag mail-extr-guess-middle-initial and the fix for | 188 ;; includes the flag mail-extr-guess-middle-initial and the fix for |
189 ;; handling multiple addresses correctly. (Whitby just changed | 189 ;; handling multiple addresses correctly. (Whitby just changed |
190 ;; a > to a <.) | 190 ;; a > to a <.) |
191 ;; | 191 ;; |
192 ;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu) | 192 ;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu) |
193 ;; | 193 ;; |
194 ;; * Cleaned up some more. Release version 1.0 to world. | 194 ;; * Cleaned up some more. Release version 1.0 to world. |
195 ;; | 195 ;; |
196 ;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu) | 196 ;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu) |
197 ;; | 197 ;; |
198 ;; * Cleaned up full name extraction extensively. | 198 ;; * Cleaned up full name extraction extensively. |
199 ;; | 199 ;; |
200 ;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu) | 200 ;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu) |
201 ;; | 201 ;; |
202 ;; * Total rewrite. Integrated mail-canonicalize-address into | 202 ;; * Total rewrite. Integrated mail-canonicalize-address into |
203 ;; mail-extract-address-components. Now handles GROUP addresses more | 203 ;; mail-extract-address-components. Now handles GROUP addresses more |
204 ;; or less correctly. Better handling of lots of different cases. | 204 ;; or less correctly. Better handling of lots of different cases. |
205 ;; | 205 ;; |
206 ;; Fri Jun 14 19:39:50 1991 | 206 ;; Fri Jun 14 19:39:50 1991 |
207 ;; * Created. | 207 ;; * Created. |
208 | 208 |
209 ;;; Code: | 209 ;;; Code: |
210 | 210 |
316 ;; Keep this set as minimal as possible. | 316 ;; Keep this set as minimal as possible. |
317 (defconst mail-extr-last-letters (purecopy "[:alpha:]`'.")) | 317 (defconst mail-extr-last-letters (purecopy "[:alpha:]`'.")) |
318 | 318 |
319 (defconst mail-extr-leading-garbage "\\W+") | 319 (defconst mail-extr-leading-garbage "\\W+") |
320 | 320 |
321 ;; (defconst mail-extr-non-name-chars | 321 ;; (defconst mail-extr-non-name-chars |
322 ;; (purecopy (concat "^" mail-extr-all-letters "."))) | 322 ;; (purecopy (concat "^" mail-extr-all-letters "."))) |
323 ;; (defconst mail-extr-non-begin-name-chars | 323 ;; (defconst mail-extr-non-begin-name-chars |
324 ;; (purecopy (concat "^" mail-extr-first-letters))) | 324 ;; (purecopy (concat "^" mail-extr-first-letters))) |
325 ;; (defconst mail-extr-non-end-name-chars | 325 ;; (defconst mail-extr-non-end-name-chars |
326 ;; (purecopy (concat "^" mail-extr-last-letters))) | 326 ;; (purecopy (concat "^" mail-extr-last-letters))) |
327 | 327 |
328 ;; Matches an initial not followed by both a period and a space. | 328 ;; Matches an initial not followed by both a period and a space. |
329 ;; (defconst mail-extr-bad-initials-pattern | 329 ;; (defconst mail-extr-bad-initials-pattern |
330 ;; (purecopy | 330 ;; (purecopy |
331 ;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)" | 331 ;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)" |
332 ;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters))) | 332 ;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters))) |
333 | 333 |
334 ;; Matches periods used instead of spaces. Must not match the period | 334 ;; Matches periods used instead of spaces. Must not match the period |
335 ;; following an initial. | 335 ;; following an initial. |
361 | 361 |
362 ;; Matches a trailing uppercase (with other characters possible) acronym. | 362 ;; Matches a trailing uppercase (with other characters possible) acronym. |
363 ;; Must not match a trailing uppercase last name or trailing initial | 363 ;; Must not match a trailing uppercase last name or trailing initial |
364 (defconst mail-extr-weird-acronym-pattern | 364 (defconst mail-extr-weird-acronym-pattern |
365 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)")) | 365 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)")) |
366 | 366 |
367 ;; Matches a mixed-case or lowercase name (not an initial). | 367 ;; Matches a mixed-case or lowercase name (not an initial). |
368 ;; #### Match Latin1 lower case letters here too? | 368 ;; #### Match Latin1 lower case letters here too? |
369 ;; (defconst mail-extr-mixed-case-name-pattern | 369 ;; (defconst mail-extr-mixed-case-name-pattern |
370 ;; (purecopy | 370 ;; (purecopy |
371 ;; (format | 371 ;; (format |
374 ;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters | 374 ;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters |
375 ;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters))) | 375 ;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters))) |
376 | 376 |
377 ;; Matches a trailing alternative address. | 377 ;; Matches a trailing alternative address. |
378 ;; #### Match Latin1 letters here too? | 378 ;; #### Match Latin1 letters here too? |
379 ;; #### Match _ before @ here too? | 379 ;; #### Match _ before @ here too? |
380 (defconst mail-extr-alternative-address-pattern | 380 (defconst mail-extr-alternative-address-pattern |
381 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]")) | 381 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]")) |
382 | 382 |
383 ;; Matches a variety of trailing comments not including comma-delimited | 383 ;; Matches a variety of trailing comments not including comma-delimited |
384 ;; comments. | 384 ;; comments. |
433 mail-extr-last-letters))) | 433 mail-extr-last-letters))) |
434 | 434 |
435 ;; Matches a single word name. | 435 ;; Matches a single word name. |
436 ;; (defconst mail-extr-one-name-pattern | 436 ;; (defconst mail-extr-one-name-pattern |
437 ;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'"))) | 437 ;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'"))) |
438 | 438 |
439 ;; Matches normal two names with missing middle initial | 439 ;; Matches normal two names with missing middle initial |
440 ;; The first name is not allowed to have a hyphen because this can cause | 440 ;; The first name is not allowed to have a hyphen because this can cause |
441 ;; false matches where the "middle initial" is actually the first letter | 441 ;; false matches where the "middle initial" is actually the first letter |
442 ;; of the second part of the first name. | 442 ;; of the second part of the first name. |
443 (defconst mail-extr-two-name-pattern | 443 (defconst mail-extr-two-name-pattern |
457 ;; | 457 ;; |
458 ;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is | 458 ;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is |
459 ;; encountered. The character '~' is an escape character. By convention, it | 459 ;; encountered. The character '~' is an escape character. By convention, it |
460 ;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the | 460 ;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the |
461 ;; following special meaning. | 461 ;; following special meaning. |
462 ;; | 462 ;; |
463 ;; o The escape sequence '~~' is interpreted as a '~'. | 463 ;; o The escape sequence '~~' is interpreted as a '~'. |
464 ;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB. | 464 ;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB. |
465 ;; o The escape sequence '~\n' is a line-continuation marker to be consumed | 465 ;; o The escape sequence '~\n' is a line-continuation marker to be consumed |
466 ;; with no output produced. | 466 ;; with no output produced. |
467 ;; | 467 ;; |
468 ;; In GB mode, characters are interpreted two bytes at a time as (pure) GB | 468 ;; In GB mode, characters are interpreted two bytes at a time as (pure) GB |
469 ;; codes until the escape-from-GB code '~}' is read. This code switches the | 469 ;; codes until the escape-from-GB code '~}' is read. This code switches the |
470 ;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}' | 470 ;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}' |
471 ;; ($7E7D) is outside the defined GB range.) | 471 ;; ($7E7D) is outside the defined GB range.) |
472 (defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern | 472 (defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern |
732 (buffer-disable-undo extraction-buffer) | 732 (buffer-disable-undo extraction-buffer) |
733 (set-syntax-table mail-extr-address-syntax-table) | 733 (set-syntax-table mail-extr-address-syntax-table) |
734 (widen) | 734 (widen) |
735 (erase-buffer) | 735 (erase-buffer) |
736 (setq case-fold-search nil) | 736 (setq case-fold-search nil) |
737 | 737 |
738 ;; Insert extra space at beginning to allow later replacement with < | 738 ;; Insert extra space at beginning to allow later replacement with < |
739 ;; without having to move markers. | 739 ;; without having to move markers. |
740 (insert ?\ ) | 740 (insert ?\ ) |
741 | 741 |
742 ;; Insert the address itself. | 742 ;; Insert the address itself. |
752 (with-current-buffer (get-buffer-create canonicalization-buffer) | 752 (with-current-buffer (get-buffer-create canonicalization-buffer) |
753 (fundamental-mode) | 753 (fundamental-mode) |
754 (buffer-disable-undo canonicalization-buffer) | 754 (buffer-disable-undo canonicalization-buffer) |
755 (setq case-fold-search nil)) | 755 (setq case-fold-search nil)) |
756 | 756 |
757 | 757 |
758 ;; Unfold multiple lines. | 758 ;; Unfold multiple lines. |
759 (goto-char (point-min)) | 759 (goto-char (point-min)) |
760 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) | 760 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) |
761 (replace-match "\\1 " t)) | 761 (replace-match "\\1 " t)) |
762 | 762 |
763 ;; Loop over addresses until we have as many as we want. | 763 ;; Loop over addresses until we have as many as we want. |
764 (while (and (or all (null value-list)) | 764 (while (and (or all (null value-list)) |
765 (progn (goto-char (point-min)) | 765 (progn (goto-char (point-min)) |
766 (skip-chars-forward " \t") | 766 (skip-chars-forward " \t") |
767 (not (eobp)))) | 767 (not (eobp)))) |
1010 ;; *** are hardly ever seen. | 1010 ;; *** are hardly ever seen. |
1011 ) | 1011 ) |
1012 | 1012 |
1013 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any | 1013 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any |
1014 ;; others. | 1014 ;; others. |
1015 ;; Hell, go ahead an nuke all of the commas. | 1015 ;; Hell, go ahead and nuke all of the commas. |
1016 ;; **** This will cause problems when we start handling commas in | 1016 ;; **** This will cause problems when we start handling commas in |
1017 ;; the PHRASE part .... no it won't ... yes it will ... ????? | 1017 ;; the PHRASE part .... no it won't ... yes it will ... ????? |
1018 (mail-extr-nuke-outside-range comma-pos 1 1) | 1018 (mail-extr-nuke-outside-range comma-pos 1 1) |
1019 | 1019 |
1020 ;; can only have multiple @s inside < >. The fact that some MTAs | 1020 ;; can only have multiple @s inside < >. The fact that some MTAs |
1493 (setq cbeg (point)) | 1493 (setq cbeg (point)) |
1494 (skip-chars-forward " \t") | 1494 (skip-chars-forward " \t") |
1495 (if (bobp) | 1495 (if (bobp) |
1496 (delete-region (point) cbeg) | 1496 (delete-region (point) cbeg) |
1497 (just-one-space)))))) | 1497 (just-one-space)))))) |
1498 | 1498 |
1499 ;; This was moved above. | 1499 ;; This was moved above. |
1500 ;; Fix . used as space | 1500 ;; Fix . used as space |
1501 ;; But it belongs here because it occurs not only as | 1501 ;; But it belongs here because it occurs not only as |
1502 ;; rypens@reks.uia.ac.be (Piet.Rypens) | 1502 ;; rypens@reks.uia.ac.be (Piet.Rypens) |
1503 ;; but also as | 1503 ;; but also as |
1522 (replace-match "\\1 \\2" t))))) | 1522 (replace-match "\\1 \\2" t))))) |
1523 | 1523 |
1524 ;; Loop over the words (and other junk) in the name. | 1524 ;; Loop over the words (and other junk) in the name. |
1525 (goto-char (point-min)) | 1525 (goto-char (point-min)) |
1526 (while (not name-done-flag) | 1526 (while (not name-done-flag) |
1527 | 1527 |
1528 (when word-found-flag | 1528 (when word-found-flag |
1529 ;; Last time through this loop we skipped over a word. | 1529 ;; Last time through this loop we skipped over a word. |
1530 (setq last-word-beg this-word-beg) | 1530 (setq last-word-beg this-word-beg) |
1531 (setq drop-last-word-if-trailing-flag | 1531 (setq drop-last-word-if-trailing-flag |
1532 drop-this-word-if-trailing-flag) | 1532 drop-this-word-if-trailing-flag) |
1541 (setq drop-last-word-if-trailing-flag nil) | 1541 (setq drop-last-word-if-trailing-flag nil) |
1542 (setq mixed-case-flag nil) | 1542 (setq mixed-case-flag nil) |
1543 (setq lower-case-flag nil) | 1543 (setq lower-case-flag nil) |
1544 ;; (setq upper-case-flag nil) | 1544 ;; (setq upper-case-flag nil) |
1545 (setq begin-again-flag nil)) | 1545 (setq begin-again-flag nil)) |
1546 | 1546 |
1547 ;; Initialize for this iteration of the loop. | 1547 ;; Initialize for this iteration of the loop. |
1548 (mail-extr-skip-whitespace-forward) | 1548 (mail-extr-skip-whitespace-forward) |
1549 (if (eq word-count 0) (narrow-to-region (point) (point-max))) | 1549 (if (eq word-count 0) (narrow-to-region (point) (point-max))) |
1550 (setq this-word-beg (point)) | 1550 (setq this-word-beg (point)) |
1551 (setq drop-this-word-if-trailing-flag nil) | 1551 (setq drop-this-word-if-trailing-flag nil) |
1552 | 1552 |
1553 ;; Decide what to do based on what we are looking at. | 1553 ;; Decide what to do based on what we are looking at. |
1554 (cond | 1554 (cond |
1555 | 1555 |
1556 ;; Delete title | 1556 ;; Delete title |
1557 ((and (eq word-count 0) | 1557 ((and (eq word-count 0) |
1558 (looking-at mail-extr-full-name-prefixes)) | 1558 (looking-at mail-extr-full-name-prefixes)) |
1559 (goto-char (match-end 0)) | 1559 (goto-char (match-end 0)) |
1560 (narrow-to-region (point) (point-max))) | 1560 (narrow-to-region (point) (point-max))) |
1561 | 1561 |
1562 ;; Stop after name suffix | 1562 ;; Stop after name suffix |
1563 ((and (>= word-count 2) | 1563 ((and (>= word-count 2) |
1564 (looking-at mail-extr-full-name-suffix-pattern)) | 1564 (looking-at mail-extr-full-name-suffix-pattern)) |
1565 (mail-extr-skip-whitespace-backward) | 1565 (mail-extr-skip-whitespace-backward) |
1566 (setq suffix-flag (point)) | 1566 (setq suffix-flag (point)) |
1578 (insert ?.))) | 1578 (insert ?.))) |
1579 (t | 1579 (t |
1580 (upcase-word 1))) | 1580 (upcase-word 1))) |
1581 (setq word-found-flag t) | 1581 (setq word-found-flag t) |
1582 (setq name-done-flag t)) | 1582 (setq name-done-flag t)) |
1583 | 1583 |
1584 ;; Handle SCA names | 1584 ;; Handle SCA names |
1585 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" | 1585 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" |
1586 (goto-char (match-beginning 1)) | 1586 (goto-char (match-beginning 1)) |
1587 (narrow-to-region (point) (point-max)) | 1587 (narrow-to-region (point) (point-max)) |
1588 (setq begin-again-flag t)) | 1588 (setq begin-again-flag t)) |
1589 | 1589 |
1590 ;; Check for initial last name followed by comma | 1590 ;; Check for initial last name followed by comma |
1591 ((and (eq ?, (following-char)) | 1591 ((and (eq ?, (following-char)) |
1592 (eq word-count 1)) | 1592 (eq word-count 1)) |
1593 (forward-char 1) | 1593 (forward-char 1) |
1594 (setq last-name-comma-flag t) | 1594 (setq last-name-comma-flag t) |
1595 (or (eq ?\ (following-char)) | 1595 (or (eq ?\ (following-char)) |
1596 (insert ?\ ))) | 1596 (insert ?\ ))) |
1597 | 1597 |
1598 ;; Stop before trailing comma-separated comment | 1598 ;; Stop before trailing comma-separated comment |
1599 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | 1599 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. |
1600 ;; *** This case is redundant??? | 1600 ;; *** This case is redundant??? |
1601 ;;((eq ?, (following-char)) | 1601 ;;((eq ?, (following-char)) |
1602 ;; (setq name-done-flag t)) | 1602 ;; (setq name-done-flag t)) |
1603 | 1603 |
1604 ;; Delete parenthesized/quoted comment/nickname | 1604 ;; Delete parenthesized/quoted comment/nickname |
1605 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) | 1605 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) |
1606 (setq cbeg (point)) | 1606 (setq cbeg (point)) |
1607 (set-syntax-table mail-extr-address-text-comment-syntax-table) | 1607 (set-syntax-table mail-extr-address-text-comment-syntax-table) |
1608 (cond ((memq (following-char) '(?\' ?\`)) | 1608 (cond ((memq (following-char) '(?\' ?\`)) |
1630 (setq initial (char-after (1+ cbeg))) | 1630 (setq initial (char-after (1+ cbeg))) |
1631 (setq initial nil)) | 1631 (setq initial nil)) |
1632 (delete-region cbeg cend) | 1632 (delete-region cbeg cend) |
1633 (if initial | 1633 (if initial |
1634 (insert initial ". "))))) | 1634 (insert initial ". "))))) |
1635 | 1635 |
1636 ;; Handle *Stupid* VMS date stamps | 1636 ;; Handle *Stupid* VMS date stamps |
1637 ((looking-at mail-extr-stupid-vms-date-stamp-pattern) | 1637 ((looking-at mail-extr-stupid-vms-date-stamp-pattern) |
1638 (replace-match "" t)) | 1638 (replace-match "" t)) |
1639 | 1639 |
1640 ;; Handle Chinese characters. | 1640 ;; Handle Chinese characters. |
1641 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern) | 1641 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern) |
1642 (goto-char (match-end 0)) | 1642 (goto-char (match-end 0)) |
1643 (setq word-found-flag t)) | 1643 (setq word-found-flag t)) |
1644 | 1644 |
1645 ;; Skip initial garbage characters. | 1645 ;; Skip initial garbage characters. |
1646 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | 1646 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. |
1647 ((and (eq word-count 0) | 1647 ((and (eq word-count 0) |
1648 (looking-at mail-extr-leading-garbage)) | 1648 (looking-at mail-extr-leading-garbage)) |
1649 (goto-char (match-end 0)) | 1649 (goto-char (match-end 0)) |
1650 ;; *** Skip backward over these??? | 1650 ;; *** Skip backward over these??? |
1651 ;; (skip-chars-backward "& \"") | 1651 ;; (skip-chars-backward "& \"") |
1652 (narrow-to-region (point) (point-max))) | 1652 (narrow-to-region (point) (point-max))) |
1653 | 1653 |
1654 ;; Various stopping points | 1654 ;; Various stopping points |
1655 ((or | 1655 ((or |
1656 | 1656 |
1657 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case | 1657 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case |
1658 ;; words. Example: XT-DEM. | 1658 ;; words. Example: XT-DEM. |
1659 (and (>= word-count 2) | 1659 (and (>= word-count 2) |
1660 mixed-case-flag | 1660 mixed-case-flag |
1661 (looking-at mail-extr-weird-acronym-pattern) | 1661 (looking-at mail-extr-weird-acronym-pattern) |
1662 (not (looking-at mail-extr-roman-numeral-pattern))) | 1662 (not (looking-at mail-extr-roman-numeral-pattern))) |
1663 | 1663 |
1664 ;; Stop before trailing alternative address | 1664 ;; Stop before trailing alternative address |
1665 (looking-at mail-extr-alternative-address-pattern) | 1665 (looking-at mail-extr-alternative-address-pattern) |
1666 | 1666 |
1667 ;; Stop before trailing comment not introduced by comma | 1667 ;; Stop before trailing comment not introduced by comma |
1668 ;; THIS CASE MUST BE AFTER AN EARLIER CASE. | 1668 ;; THIS CASE MUST BE AFTER AN EARLIER CASE. |
1669 (looking-at mail-extr-trailing-comment-start-pattern) | 1669 (looking-at mail-extr-trailing-comment-start-pattern) |
1670 | 1670 |
1671 ;; Stop before telephone numbers | 1671 ;; Stop before telephone numbers |
1672 (and (>= word-count 1) | 1672 (and (>= word-count 1) |
1673 (looking-at mail-extr-telephone-extension-pattern))) | 1673 (looking-at mail-extr-telephone-extension-pattern))) |
1674 (setq name-done-flag t)) | 1674 (setq name-done-flag t)) |
1675 | 1675 |
1676 ;; Delete ham radio call signs | 1676 ;; Delete ham radio call signs |
1677 ((looking-at mail-extr-ham-call-sign-pattern) | 1677 ((looking-at mail-extr-ham-call-sign-pattern) |
1678 (delete-region (match-beginning 0) (match-end 0))) | 1678 (delete-region (match-beginning 0) (match-end 0))) |
1679 | 1679 |
1680 ;; Fixup initials | 1680 ;; Fixup initials |
1681 ((looking-at mail-extr-initial-pattern) | 1681 ((looking-at mail-extr-initial-pattern) |
1682 (or (eq (following-char) (upcase (following-char))) | 1682 (or (eq (following-char) (upcase (following-char))) |
1683 (setq lower-case-flag t)) | 1683 (setq lower-case-flag t)) |
1684 (forward-char 1) | 1684 (forward-char 1) |
1686 (forward-char 1) | 1686 (forward-char 1) |
1687 (insert ?.)) | 1687 (insert ?.)) |
1688 (or (eq ?\ (following-char)) | 1688 (or (eq ?\ (following-char)) |
1689 (insert ?\ )) | 1689 (insert ?\ )) |
1690 (setq word-found-flag t)) | 1690 (setq word-found-flag t)) |
1691 | 1691 |
1692 ;; Handle BITNET LISTSERV list names. | 1692 ;; Handle BITNET LISTSERV list names. |
1693 ((and (eq word-count 0) | 1693 ((and (eq word-count 0) |
1694 (looking-at mail-extr-listserv-list-name-pattern)) | 1694 (looking-at mail-extr-listserv-list-name-pattern)) |
1695 (narrow-to-region (match-beginning 1) (match-end 1)) | 1695 (narrow-to-region (match-beginning 1) (match-end 1)) |
1696 (setq word-found-flag t) | 1696 (setq word-found-flag t) |
1697 (setq name-done-flag t)) | 1697 (setq name-done-flag t)) |
1698 | 1698 |
1699 ;; Handle & substitution, when & is last and is not first. | 1699 ;; Handle & substitution, when & is last and is not first. |
1700 ((and (> word-count 0) | 1700 ((and (> word-count 0) |
1701 (eq ?\ (preceding-char)) | 1701 (eq ?\ (preceding-char)) |
1702 (eq (following-char) ?&) | 1702 (eq (following-char) ?&) |
1703 (eq (1+ (point)) (point-max))) | 1703 (eq (1+ (point)) (point-max))) |
1720 | 1720 |
1721 ;; Regular name words | 1721 ;; Regular name words |
1722 ((looking-at mail-extr-name-pattern) | 1722 ((looking-at mail-extr-name-pattern) |
1723 (setq name-beg (point)) | 1723 (setq name-beg (point)) |
1724 (setq name-end (match-end 0)) | 1724 (setq name-end (match-end 0)) |
1725 | 1725 |
1726 ;; Certain words will be dropped if they are at the end. | 1726 ;; Certain words will be dropped if they are at the end. |
1727 (and (>= word-count 2) | 1727 (and (>= word-count 2) |
1728 (not lower-case-flag) | 1728 (not lower-case-flag) |
1729 (or | 1729 (or |
1730 ;; A trailing 4-or-more letter lowercase words preceded by | 1730 ;; A trailing 4-or-more letter lowercase words preceded by |
1731 ;; mixed case or uppercase words will be dropped. | 1731 ;; mixed case or uppercase words will be dropped. |
1732 (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'") | 1732 (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'") |
1733 ;; Drop a trailing word which is terminated with a period. | 1733 ;; Drop a trailing word which is terminated with a period. |
1734 (eq ?. (char-after (1- name-end)))) | 1734 (eq ?. (char-after (1- name-end)))) |
1735 (setq drop-this-word-if-trailing-flag t)) | 1735 (setq drop-this-word-if-trailing-flag t)) |
1736 | 1736 |
1737 ;; Set the flags that indicate whether we have seen a lowercase | 1737 ;; Set the flags that indicate whether we have seen a lowercase |
1738 ;; word, a mixed case word, and an uppercase word. | 1738 ;; word, a mixed case word, and an uppercase word. |
1739 (if (re-search-forward "[a-z]" name-end t) | 1739 (if (re-search-forward "[a-z]" name-end t) |
1740 (if (progn | 1740 (if (progn |
1741 (goto-char name-beg) | 1741 (goto-char name-beg) |
1742 (re-search-forward "[A-Z]" name-end t)) | 1742 (re-search-forward "[A-Z]" name-end t)) |
1743 (setq mixed-case-flag t) | 1743 (setq mixed-case-flag t) |
1744 (setq lower-case-flag t)) | 1744 (setq lower-case-flag t)) |
1745 ;; (setq upper-case-flag t) | 1745 ;; (setq upper-case-flag t) |
1746 ) | 1746 ) |
1747 | 1747 |
1748 (goto-char name-end) | 1748 (goto-char name-end) |
1749 (setq word-found-flag t)) | 1749 (setq word-found-flag t)) |
1750 | 1750 |
1751 ;; Allow a number as a word, if it doesn't mean anything else. | 1751 ;; Allow a number as a word, if it doesn't mean anything else. |
1752 ((looking-at "[0-9]+\\>") | 1752 ((looking-at "[0-9]+\\>") |
1756 (setq word-found-flag t)) | 1756 (setq word-found-flag t)) |
1757 | 1757 |
1758 (t | 1758 (t |
1759 (setq name-done-flag t) | 1759 (setq name-done-flag t) |
1760 )) | 1760 )) |
1761 | 1761 |
1762 ;; Count any word that we skipped over. | 1762 ;; Count any word that we skipped over. |
1763 (if word-found-flag | 1763 (if word-found-flag |
1764 (setq word-count (1+ word-count)))) | 1764 (setq word-count (1+ word-count)))) |
1765 | 1765 |
1766 ;; If the last thing in the name is 2 or more periods, or one or more | 1766 ;; If the last thing in the name is 2 or more periods, or one or more |
1767 ;; other sentence terminators (but not a single period) then keep them | 1767 ;; other sentence terminators (but not a single period) then keep them |
1768 ;; and the preceding word. This is for the benefit of whole sentences | 1768 ;; and the preceding word. This is for the benefit of whole sentences |
1769 ;; in the name field: it's better behavior than dropping the last word | 1769 ;; in the name field: it's better behavior than dropping the last word |
1770 ;; of the sentence... | 1770 ;; of the sentence... |
1775 ;; Drop everything after point and certain trailing words. | 1775 ;; Drop everything after point and certain trailing words. |
1776 (narrow-to-region (point-min) | 1776 (narrow-to-region (point-min) |
1777 (or (and drop-last-word-if-trailing-flag | 1777 (or (and drop-last-word-if-trailing-flag |
1778 last-word-beg) | 1778 last-word-beg) |
1779 (point))) | 1779 (point))) |
1780 | 1780 |
1781 ;; Xerox's mailers SUCK!!!!!! | 1781 ;; Xerox's mailers SUCK!!!!!! |
1782 ;; We simply refuse to believe that any last name is PARC or ADOC. | 1782 ;; We simply refuse to believe that any last name is PARC or ADOC. |
1783 ;; If it looks like that is the last name, that there is no meaningful | 1783 ;; If it looks like that is the last name, that there is no meaningful |
1784 ;; here at all. Actually I guess it would be best to map patterns | 1784 ;; here at all. Actually I guess it would be best to map patterns |
1785 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't | 1785 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't |
1800 (insert ?\ )) | 1800 (insert ?\ )) |
1801 (insert-buffer-substring (current-buffer) (point-min) name-end) | 1801 (insert-buffer-substring (current-buffer) (point-min) name-end) |
1802 (goto-char name-end) | 1802 (goto-char name-end) |
1803 (skip-chars-forward "\t ,") | 1803 (skip-chars-forward "\t ,") |
1804 (narrow-to-region (point) (point-max))) | 1804 (narrow-to-region (point) (point-max))) |
1805 | 1805 |
1806 ;; Delete leading and trailing junk characters. | 1806 ;; Delete leading and trailing junk characters. |
1807 ;; *** This is probably completely unneeded now. | 1807 ;; *** This is probably completely unneeded now. |
1808 ;;(goto-char (point-max)) | 1808 ;;(goto-char (point-max)) |
1809 ;;(skip-chars-backward mail-extr-non-end-name-chars) | 1809 ;;(skip-chars-backward mail-extr-non-end-name-chars) |
1810 ;;(if (eq ?. (following-char)) | 1810 ;;(if (eq ?. (following-char)) |
1812 ;;(narrow-to-region (point) | 1812 ;;(narrow-to-region (point) |
1813 ;; (progn | 1813 ;; (progn |
1814 ;; (goto-char (point-min)) | 1814 ;; (goto-char (point-min)) |
1815 ;; (skip-chars-forward mail-extr-non-begin-name-chars) | 1815 ;; (skip-chars-forward mail-extr-non-begin-name-chars) |
1816 ;; (point))) | 1816 ;; (point))) |
1817 | 1817 |
1818 ;; Compress whitespace | 1818 ;; Compress whitespace |
1819 (goto-char (point-min)) | 1819 (goto-char (point-min)) |
1820 (while (re-search-forward "[ \t\n]+" nil t) | 1820 (while (re-search-forward "[ \t\n]+" nil t) |
1821 (replace-match (if (eobp) "" " ") t)) | 1821 (replace-match (if (eobp) "" " ") t)) |
1822 ))) | 1822 ))) |
2130 (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name))) | 2130 (message "%s: %s" (upcase (symbol-name domain)) (get domain 'domain-name))) |
2131 | 2131 |
2132 | 2132 |
2133 ;(let ((all nil)) | 2133 ;(let ((all nil)) |
2134 ; (mapatoms #'(lambda (x) | 2134 ; (mapatoms #'(lambda (x) |
2135 ; (if (and (boundp x) | 2135 ; (if (and (boundp x) |
2136 ; (string-match "^mail-extr-" (symbol-name x))) | 2136 ; (string-match "^mail-extr-" (symbol-name x))) |
2137 ; (setq all (cons x all))))) | 2137 ; (setq all (cons x all))))) |
2138 ; (setq all (sort all #'string-lessp)) | 2138 ; (setq all (sort all #'string-lessp)) |
2139 ; (cons 'setq | 2139 ; (cons 'setq |
2140 ; (apply 'nconc (mapcar #'(lambda (x) | 2140 ; (apply 'nconc (mapcar #'(lambda (x) |