Mercurial > emacs
comparison lisp/mail/mail-extr.el @ 20285:3b0ad3d46bde
(mail-extract-address-components):
New arg ALL says return info about all the addresses.
Clarify buffer switching logic using save-excursion.
author | Karl Heuer <kwzh@gnu.org> |
---|---|
date | Thu, 20 Nov 1997 21:45:59 +0000 |
parents | 5458685d9a67 |
children | fd74979309d3 |
comparison
equal
deleted
inserted
replaced
20284:ff0f79a7b8b6 | 20285:3b0ad3d46bde |
---|---|
1 ;;; mail-extr.el --- extract full name and address from RFC 822 mail header. | 1 ;;; mail-extr.el --- extract full name and address from RFC 822 mail header. |
2 | 2 |
3 ;; Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc. | 3 ;; Copyright (C) 1991, 1992, 1993, 1994, 1997 Free Software Foundation, Inc. |
4 | 4 |
5 ;; Author: Joe Wells <jbw@cs.bu.edu> | 5 ;; Author: Joe Wells <jbw@cs.bu.edu> |
6 ;; Maintainer: Jamie Zawinski <jwz@lucid.com> | 6 ;; Maintainer: FSF |
7 ;; Version: 1.8 | |
8 ;; Keywords: mail | 7 ;; Keywords: mail |
9 | 8 |
10 ;; This file is part of GNU Emacs. | 9 ;; This file is part of GNU Emacs. |
11 | 10 |
12 ;; GNU Emacs is free software; you can redistribute it and/or modify | 11 ;; GNU Emacs is free software; you can redistribute it and/or modify |
26 | 25 |
27 ;;; Commentary: | 26 ;;; Commentary: |
28 | 27 |
29 ;; The entry point of this code is | 28 ;; The entry point of this code is |
30 ;; | 29 ;; |
31 ;; mail-extract-address-components: (address) | 30 ;; mail-extract-address-components: (address &optional all) |
32 ;; | 31 ;; |
33 ;; Given an RFC-822 ADDRESS, extract full name and canonical address. | 32 ;; Given an RFC-822 ADDRESS, extract full name and canonical address. |
34 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). | 33 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). |
35 ;; If no name can be extracted, FULL-NAME will be nil. | 34 ;; If no name can be extracted, FULL-NAME will be nil. |
36 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible | 35 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible |
37 ;; (narrowed) portion of the buffer will be interpreted as the address. | 36 ;; (narrowed) portion of the buffer will be interpreted as the address. |
38 ;; (This feature exists so that the clever caller might be able to avoid | 37 ;; (This feature exists so that the clever caller might be able to avoid |
39 ;; consing a string.) | 38 ;; consing a string.) |
40 ;; If ADDRESS contains more than one RFC-822 address, only the first is | 39 ;; If ADDRESS contains more than one RFC-822 address, only the first is |
41 ;; returned. | 40 ;; returned. |
41 ;; | |
42 ;; If ALL is non-nil, that means return info about all the addresses | |
43 ;; that are found in ADDRESS. The value is a list of elements of | |
44 ;; the form (FULL-NAME CANONICAL-ADDRESS), one per address. | |
42 ;; | 45 ;; |
43 ;; This code is more correct (and more heuristic) parser than the code in | 46 ;; This code is more correct (and more heuristic) parser than the code in |
44 ;; rfc822.el. And despite its size, it's fairly fast. | 47 ;; rfc822.el. And despite its size, it's fairly fast. |
45 ;; | 48 ;; |
46 ;; There are two main benefits: | 49 ;; There are two main benefits: |
704 (defvar disable-initial-guessing-flag) ; dynamic assignment | 707 (defvar disable-initial-guessing-flag) ; dynamic assignment |
705 (defvar cbeg) ; dynamic assignment | 708 (defvar cbeg) ; dynamic assignment |
706 (defvar cend) ; dynamic assignment | 709 (defvar cend) ; dynamic assignment |
707 | 710 |
708 ;;;###autoload | 711 ;;;###autoload |
709 (defun mail-extract-address-components (address) | 712 (defun mail-extract-address-components (address &optional all) |
710 "Given an RFC-822 ADDRESS, extract full name and canonical address. | 713 "Given an RFC-822 address ADDRESS, extract full name and canonical address. |
711 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). | 714 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). |
712 If no name can be extracted, FULL-NAME will be nil. | 715 If no name can be extracted, FULL-NAME will be nil. |
716 | |
717 If the optional argument ALL is non-nil, then ADDRESS can contain zero | |
718 or more recipients, separated by commas, and we return a list of | |
719 the form ((FULL-NAME CANONICAL-ADDRESS) ...) with one element for | |
720 each recipient. If ALL is nil, then if ADDRESS contains more than | |
721 one recipients, all but the first is ignored. | |
722 | |
713 ADDRESS may be a string or a buffer. If it is a buffer, the visible | 723 ADDRESS may be a string or a buffer. If it is a buffer, the visible |
714 (narrowed) portion of the buffer will be interpreted as the address. | 724 (narrowed) portion of the buffer will be interpreted as the address. |
715 (This feature exists so that the clever caller might be able to avoid | 725 (This feature exists so that the clever caller might be able to avoid |
716 consing a string.) | 726 consing a string.)" |
717 If ADDRESS contains more than one RFC-822 address, only the first is | |
718 returned. Some day this function may be extended to extract multiple | |
719 addresses, or perhaps return the position at which parsing stopped." | |
720 (let ((canonicalization-buffer (get-buffer-create " *canonical address*")) | 727 (let ((canonicalization-buffer (get-buffer-create " *canonical address*")) |
721 (extraction-buffer (get-buffer-create " *extract address components*")) | 728 (extraction-buffer (get-buffer-create " *extract address components*")) |
722 char | 729 value-list) |
723 ;; multiple-addresses | 730 |
724 <-pos >-pos @-pos :-pos comma-pos !-pos %-pos \;-pos | |
725 group-:-pos group-\;-pos route-addr-:-pos | |
726 record-pos-symbol | |
727 first-real-pos last-real-pos | |
728 phrase-beg phrase-end | |
729 cbeg cend ; dynamically set from -voodoo | |
730 quote-beg quote-end | |
731 atom-beg atom-end | |
732 mbox-beg mbox-end | |
733 \.-ends-name | |
734 temp | |
735 ;; name-suffix | |
736 fi mi li ; first, middle, last initial | |
737 saved-%-pos saved-!-pos saved-@-pos | |
738 domain-pos \.-pos insert-point | |
739 ;; mailbox-name-processed-flag | |
740 disable-initial-guessing-flag ; dynamically set from -voodoo | |
741 ) | |
742 | |
743 (save-excursion | 731 (save-excursion |
744 (set-buffer extraction-buffer) | 732 (set-buffer extraction-buffer) |
745 (fundamental-mode) | 733 (fundamental-mode) |
746 (kill-all-local-variables) | |
747 (buffer-disable-undo extraction-buffer) | 734 (buffer-disable-undo extraction-buffer) |
748 (set-syntax-table mail-extr-address-syntax-table) | 735 (set-syntax-table mail-extr-address-syntax-table) |
749 (widen) | 736 (widen) |
750 (erase-buffer) | 737 (erase-buffer) |
751 (setq case-fold-search nil) | 738 (setq case-fold-search nil) |
761 (insert-buffer-substring address)) | 748 (insert-buffer-substring address)) |
762 (t | 749 (t |
763 (error "Invalid address: %s" address))) | 750 (error "Invalid address: %s" address))) |
764 | 751 |
765 (set-text-properties (point-min) (point-max) nil) | 752 (set-text-properties (point-min) (point-max) nil) |
753 | |
754 (save-excursion | |
755 (set-buffer canonicalization-buffer) | |
756 (fundamental-mode) | |
757 (buffer-disable-undo canonicalization-buffer) | |
758 (set-syntax-table mail-extr-address-syntax-table) | |
759 (setq case-fold-search nil)) | |
760 | |
766 | 761 |
767 ;; stolen from rfc822.el | |
768 ;; Unfold multiple lines. | 762 ;; Unfold multiple lines. |
769 (goto-char (point-min)) | 763 (goto-char (point-min)) |
770 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) | 764 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) |
771 (replace-match "\\1 " t)) | 765 (replace-match "\\1 " t)) |
772 | 766 |
773 ;; first pass grabs useful information about address | 767 ;; Loop over addresses until we have as many as we want. |
774 (goto-char (point-min)) | 768 (while (and (or all (null value-list)) |
775 (while (progn | 769 (progn (goto-char (point-min)) |
776 (mail-extr-skip-whitespace-forward) | 770 (skip-chars-forward " \t") |
777 (not (eobp))) | 771 (not (eobp)))) |
778 (setq char (char-after (point))) | 772 (let (char |
779 (or first-real-pos | 773 end-of-address |
780 (if (not (eq char ?\()) | 774 <-pos >-pos @-pos :-pos comma-pos !-pos %-pos \;-pos |
781 (setq first-real-pos (point)))) | 775 group-:-pos group-\;-pos route-addr-:-pos |
782 (cond | 776 record-pos-symbol |
783 ;; comment | 777 first-real-pos last-real-pos |
784 ((eq char ?\() | 778 phrase-beg phrase-end |
785 (set-syntax-table mail-extr-address-comment-syntax-table) | 779 cbeg cend ; dynamically set from -voodoo |
786 ;; only record the first non-empty comment's position | 780 quote-beg quote-end |
787 (if (and (not cbeg) | 781 atom-beg atom-end |
782 mbox-beg mbox-end | |
783 \.-ends-name | |
784 temp | |
785 ;; name-suffix | |
786 fi mi li ; first, middle, last initial | |
787 saved-%-pos saved-!-pos saved-@-pos | |
788 domain-pos \.-pos insert-point | |
789 ;; mailbox-name-processed-flag | |
790 disable-initial-guessing-flag) ; dynamically set from -voodoo | |
791 | |
792 (goto-char (point-min)) | |
793 | |
794 ;; Insert extra space at beginning to allow later replacement with < | |
795 ;; without having to move markers. | |
796 (or (eq (following-char) ?\ ) | |
797 (insert ?\ )) | |
798 | |
799 ;; First pass grabs useful information about address. | |
800 (while (progn | |
801 (mail-extr-skip-whitespace-forward) | |
802 (not (eobp))) | |
803 (setq char (char-after (point))) | |
804 (or first-real-pos | |
805 (if (not (eq char ?\()) | |
806 (setq first-real-pos (point)))) | |
807 (cond | |
808 ;; comment | |
809 ((eq char ?\() | |
810 (set-syntax-table mail-extr-address-comment-syntax-table) | |
811 ;; only record the first non-empty comment's position | |
812 (if (and (not cbeg) | |
813 (save-excursion | |
814 (forward-char 1) | |
815 (mail-extr-skip-whitespace-forward) | |
816 (not (eq ?\) (char-after (point)))))) | |
817 (setq cbeg (point))) | |
818 ;; TODO: don't record if unbalanced | |
819 (or (mail-extr-safe-move-sexp 1) | |
820 (forward-char 1)) | |
821 (set-syntax-table mail-extr-address-syntax-table) | |
822 (if (and cbeg | |
823 (not cend)) | |
824 (setq cend (point)))) | |
825 ;; quoted text | |
826 ((eq char ?\") | |
827 ;; only record the first non-empty quote's position | |
828 (if (and (not quote-beg) | |
829 (save-excursion | |
830 (forward-char 1) | |
831 (mail-extr-skip-whitespace-forward) | |
832 (not (eq ?\" (char-after (point)))))) | |
833 (setq quote-beg (point))) | |
834 ;; TODO: don't record if unbalanced | |
835 (or (mail-extr-safe-move-sexp 1) | |
836 (forward-char 1)) | |
837 (if (and quote-beg | |
838 (not quote-end)) | |
839 (setq quote-end (point)))) | |
840 ;; domain literals | |
841 ((eq char ?\[) | |
842 (set-syntax-table mail-extr-address-domain-literal-syntax-table) | |
843 (or (mail-extr-safe-move-sexp 1) | |
844 (forward-char 1)) | |
845 (set-syntax-table mail-extr-address-syntax-table)) | |
846 ;; commas delimit addresses when outside < > pairs. | |
847 ((and (eq char ?,) | |
848 (or (and (null <-pos) | |
849 ;; Handle ROUTE-ADDR address that is missing its <. | |
850 (not (eq ?@ (char-after (1+ (point)))))) | |
851 (and >-pos | |
852 ;; handle weird munged addresses | |
853 ;; BUG FIX: This test was reversed. Thanks to the | |
854 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au> | |
855 ;; for discovering this! | |
856 (< (mail-extr-last <-pos) (car >-pos))))) | |
857 ;; The argument contains more than one address. | |
858 ;; Temporarily hide everything after this one. | |
859 (setq end-of-address (copy-marker (1+ (point)))) | |
860 (narrow-to-region (point-min) (1+ (point))) | |
861 (mail-extr-delete-char 1) | |
862 (setq char ?\() ; HAVE I NO SHAME?? | |
863 ) | |
864 ;; record the position of various interesting chars, determine | |
865 ;; legality later. | |
866 ((setq record-pos-symbol | |
867 (cdr (assq char | |
868 '((?< . <-pos) (?> . >-pos) (?@ . @-pos) | |
869 (?: . :-pos) (?, . comma-pos) (?! . !-pos) | |
870 (?% . %-pos) (?\; . \;-pos))))) | |
871 (set record-pos-symbol | |
872 (cons (point) (symbol-value record-pos-symbol))) | |
873 (forward-char 1)) | |
874 ((eq char ?.) | |
875 (forward-char 1)) | |
876 ((memq char '( | |
877 ;; comment terminator illegal | |
878 ?\) | |
879 ;; domain literal terminator illegal | |
880 ?\] | |
881 ;; \ allowed only within quoted strings, | |
882 ;; domain literals, and comments | |
883 ?\\ | |
884 )) | |
885 (mail-extr-nuke-char-at (point)) | |
886 (forward-char 1)) | |
887 (t | |
888 (forward-word 1))) | |
889 (or (eq char ?\() | |
890 ;; At the end of first address of a multiple address header. | |
891 (and (eq char ?,) | |
892 (eobp)) | |
893 (setq last-real-pos (point)))) | |
894 | |
895 ;; Use only the leftmost <, if any. Replace all others with spaces. | |
896 (while (cdr <-pos) | |
897 (mail-extr-nuke-char-at (car <-pos)) | |
898 (setq <-pos (cdr <-pos))) | |
899 | |
900 ;; Use only the rightmost >, if any. Replace all others with spaces. | |
901 (while (cdr >-pos) | |
902 (mail-extr-nuke-char-at (nth 1 >-pos)) | |
903 (setcdr >-pos (nthcdr 2 >-pos))) | |
904 | |
905 ;; If multiple @s and a :, but no < and >, insert around buffer. | |
906 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc | |
907 ;; This commonly happens on the UUCP "From " line. Ugh. | |
908 (cond ((and (> (length @-pos) 1) | |
909 (eq 1 (length :-pos)) ;TODO: check if between last two @s | |
910 (not \;-pos) | |
911 (not <-pos)) | |
912 (goto-char (point-min)) | |
913 (mail-extr-delete-char 1) | |
914 (setq <-pos (list (point))) | |
915 (insert ?<))) | |
916 | |
917 ;; If < but no >, insert > in rightmost possible position | |
918 (cond ((and <-pos | |
919 (null >-pos)) | |
920 (goto-char (point-max)) | |
921 (setq >-pos (list (point))) | |
922 (insert ?>))) | |
923 | |
924 ;; If > but no <, replace > with space. | |
925 (cond ((and >-pos | |
926 (null <-pos)) | |
927 (mail-extr-nuke-char-at (car >-pos)) | |
928 (setq >-pos nil))) | |
929 | |
930 ;; Turn >-pos and <-pos into non-lists | |
931 (setq >-pos (car >-pos) | |
932 <-pos (car <-pos)) | |
933 | |
934 ;; Trim other punctuation lists of items outside < > pair to handle | |
935 ;; stupid MTAs. | |
936 (cond (<-pos ; don't need to check >-pos also | |
937 ;; handle bozo software that violates RFC 822 by sticking | |
938 ;; punctuation marks outside of a < > pair | |
939 (mail-extr-nuke-outside-range @-pos <-pos >-pos t) | |
940 ;; RFC 822 says nothing about these two outside < >, but | |
941 ;; remove those positions from the lists to make things | |
942 ;; easier. | |
943 (mail-extr-nuke-outside-range !-pos <-pos >-pos t) | |
944 (mail-extr-nuke-outside-range %-pos <-pos >-pos t))) | |
945 | |
946 ;; Check for : that indicates GROUP list and for : part of | |
947 ;; ROUTE-ADDR spec. | |
948 ;; Can't possibly be more than two :. Nuke any extra. | |
949 (while :-pos | |
950 (setq temp (car :-pos) | |
951 :-pos (cdr :-pos)) | |
952 (cond ((and <-pos >-pos | |
953 (> temp <-pos) | |
954 (< temp >-pos)) | |
955 (if (or route-addr-:-pos | |
956 (< (length @-pos) 2) | |
957 (> temp (car @-pos)) | |
958 (< temp (nth 1 @-pos))) | |
959 (mail-extr-nuke-char-at temp) | |
960 (setq route-addr-:-pos temp))) | |
961 ((or (not <-pos) | |
962 (and <-pos | |
963 (< temp <-pos))) | |
964 (setq group-:-pos temp)))) | |
965 | |
966 ;; Nuke any ; that is in or to the left of a < > pair or to the left | |
967 ;; of a GROUP starting :. Also, there may only be one ;. | |
968 (while \;-pos | |
969 (setq temp (car \;-pos) | |
970 \;-pos (cdr \;-pos)) | |
971 (cond ((and <-pos >-pos | |
972 (> temp <-pos) | |
973 (< temp >-pos)) | |
974 (mail-extr-nuke-char-at temp)) | |
975 ((and (or (not group-:-pos) | |
976 (> temp group-:-pos)) | |
977 (not group-\;-pos)) | |
978 (setq group-\;-pos temp)))) | |
979 | |
980 ;; Nuke unmatched GROUP syntax characters. | |
981 (cond ((and group-:-pos (not group-\;-pos)) | |
982 ;; *** Do I really need to erase it? | |
983 (mail-extr-nuke-char-at group-:-pos) | |
984 (setq group-:-pos nil))) | |
985 (cond ((and group-\;-pos (not group-:-pos)) | |
986 ;; *** Do I really need to erase it? | |
987 (mail-extr-nuke-char-at group-\;-pos) | |
988 (setq group-\;-pos nil))) | |
989 | |
990 ;; Handle junk like ";@host.company.dom" that sendmail adds. | |
991 ;; **** should I remember comment positions? | |
992 (cond | |
993 (group-\;-pos | |
994 ;; this is fine for now | |
995 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t) | |
996 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t) | |
997 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t) | |
998 (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t) | |
999 (and last-real-pos | |
1000 (> last-real-pos (1+ group-\;-pos)) | |
1001 (setq last-real-pos (1+ group-\;-pos))) | |
1002 ;; *** This may be wrong: | |
1003 (and cend | |
1004 (> cend group-\;-pos) | |
1005 (setq cend nil | |
1006 cbeg nil)) | |
1007 (and quote-end | |
1008 (> quote-end group-\;-pos) | |
1009 (setq quote-end nil | |
1010 quote-beg nil)) | |
1011 ;; This was both wrong and unnecessary: | |
1012 ;;(narrow-to-region (point-min) group-\;-pos) | |
1013 | |
1014 ;; *** The entire handling of GROUP addresses seems rather lame. | |
1015 ;; *** It deserves a complete rethink, except that these addresses | |
1016 ;; *** are hardly ever seen. | |
1017 )) | |
1018 | |
1019 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any | |
1020 ;; others. | |
1021 ;; Hell, go ahead an nuke all of the commas. | |
1022 ;; **** This will cause problems when we start handling commas in | |
1023 ;; the PHRASE part .... no it won't ... yes it will ... ????? | |
1024 (mail-extr-nuke-outside-range comma-pos 1 1) | |
1025 | |
1026 ;; can only have multiple @s inside < >. The fact that some MTAs | |
1027 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is | |
1028 ;; handled above. | |
1029 | |
1030 ;; Locate PHRASE part of ROUTE-ADDR. | |
1031 (cond (<-pos | |
1032 (goto-char <-pos) | |
1033 (mail-extr-skip-whitespace-backward) | |
1034 (setq phrase-end (point)) | |
1035 (goto-char (or ;;group-:-pos | |
1036 (point-min))) | |
1037 (mail-extr-skip-whitespace-forward) | |
1038 (if (< (point) phrase-end) | |
1039 (setq phrase-beg (point)) | |
1040 (setq phrase-end nil)))) | |
1041 | |
1042 ;; handle ROUTE-ADDRS with real ROUTEs. | |
1043 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and | |
1044 ;; any % or ! must be semantically meaningless. | |
1045 ;; TODO: do this processing into canonicalization buffer | |
1046 (cond (route-addr-:-pos | |
1047 (setq !-pos nil | |
1048 %-pos nil | |
1049 >-pos (copy-marker >-pos) | |
1050 route-addr-:-pos (copy-marker route-addr-:-pos)) | |
1051 (goto-char >-pos) | |
1052 (insert-before-markers ?X) | |
1053 (goto-char (car @-pos)) | |
1054 (while (setq @-pos (cdr @-pos)) | |
1055 (mail-extr-delete-char 1) | |
1056 (setq %-pos (cons (point-marker) %-pos)) | |
1057 (insert "%") | |
1058 (goto-char (1- >-pos)) | |
788 (save-excursion | 1059 (save-excursion |
789 (forward-char 1) | 1060 (insert-buffer-substring extraction-buffer |
790 (mail-extr-skip-whitespace-forward) | 1061 (car @-pos) route-addr-:-pos) |
791 (not (eq ?\) (char-after (point)))))) | 1062 (delete-region (car @-pos) route-addr-:-pos)) |
792 (setq cbeg (point))) | 1063 (or (cdr @-pos) |
793 ;; TODO: don't record if unbalanced | 1064 (setq saved-@-pos (list (point))))) |
794 (or (mail-extr-safe-move-sexp 1) | 1065 (setq @-pos saved-@-pos) |
795 (forward-char 1)) | 1066 (goto-char >-pos) |
796 (set-syntax-table mail-extr-address-syntax-table) | 1067 (mail-extr-delete-char -1) |
797 (if (and cbeg | 1068 (mail-extr-nuke-char-at route-addr-:-pos) |
798 (not cend)) | 1069 (mail-extr-demarkerize route-addr-:-pos) |
799 (setq cend (point)))) | 1070 (setq route-addr-:-pos nil |
800 ;; quoted text | 1071 >-pos (mail-extr-demarkerize >-pos) |
801 ((eq char ?\") | 1072 %-pos (mapcar 'mail-extr-demarkerize %-pos)))) |
802 ;; only record the first non-empty quote's position | 1073 |
803 (if (and (not quote-beg) | 1074 ;; de-listify @-pos |
804 (save-excursion | 1075 (setq @-pos (car @-pos)) |
805 (forward-char 1) | 1076 |
806 (mail-extr-skip-whitespace-forward) | 1077 ;; TODO: remove comments in the middle of an address |
807 (not (eq ?\" (char-after (point)))))) | 1078 |
808 (setq quote-beg (point))) | 1079 (save-excursion |
809 ;; TODO: don't record if unbalanced | 1080 (set-buffer canonicalization-buffer) |
810 (or (mail-extr-safe-move-sexp 1) | 1081 |
811 (forward-char 1)) | 1082 (widen) |
812 (if (and quote-beg | 1083 (erase-buffer) |
813 (not quote-end)) | 1084 (insert-buffer-substring extraction-buffer) |
814 (setq quote-end (point)))) | 1085 |
815 ;; domain literals | 1086 (if <-pos |
816 ((eq char ?\[) | 1087 (narrow-to-region (progn |
817 (set-syntax-table mail-extr-address-domain-literal-syntax-table) | 1088 (goto-char (1+ <-pos)) |
818 (or (mail-extr-safe-move-sexp 1) | 1089 (mail-extr-skip-whitespace-forward) |
819 (forward-char 1)) | 1090 (point)) |
820 (set-syntax-table mail-extr-address-syntax-table)) | 1091 >-pos) |
821 ;; commas delimit addresses when outside < > pairs. | 1092 (if (and first-real-pos last-real-pos) |
822 ((and (eq char ?,) | 1093 (narrow-to-region first-real-pos last-real-pos) |
823 (or (and (null <-pos) | 1094 ;; ****** Oh no! What if the address is completely empty! |
824 ;; Handle ROUTE-ADDR address that is missing its <. | 1095 ;; *** Is this correct? |
825 (not (eq ?@ (char-after (1+ (point)))))) | 1096 (narrow-to-region (point-max) (point-max)) |
826 (and >-pos | 1097 )) |
827 ;; handle weird munged addresses | 1098 |
828 ;; BUG FIX: This test was reversed. Thanks to the | 1099 (and @-pos %-pos |
829 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au> | 1100 (mail-extr-nuke-outside-range %-pos (point-min) @-pos)) |
830 ;; for discovering this! | 1101 (and %-pos !-pos |
831 (< (mail-extr-last <-pos) (car >-pos))))) | 1102 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos))) |
832 ;; It'd be great if some day this worked, but for now, punt. | 1103 (and @-pos !-pos (not %-pos) |
833 ;; (setq multiple-addresses t) | 1104 (mail-extr-nuke-outside-range !-pos (point-min) @-pos)) |
834 ;; ;; *** Why do I want this: | 1105 |
835 ;; (mail-extr-delete-char 1) | 1106 ;; Error condition:?? (and %-pos (not @-pos)) |
836 ;; (narrow-to-region (point-min) (point)) | 1107 |
837 (delete-region (point) (point-max)) | 1108 ;; WARNING: THIS CODE IS DUPLICATED BELOW. |
838 (setq char ?\() ; HAVE I NO SHAME?? | 1109 (cond ((and %-pos |
839 ) | 1110 (not @-pos)) |
840 ;; record the position of various interesting chars, determine | 1111 (goto-char (car %-pos)) |
841 ;; legality later. | 1112 (mail-extr-delete-char 1) |
842 ((setq record-pos-symbol | 1113 (setq @-pos (point)) |
843 (cdr (assq char | 1114 (insert "@") |
844 '((?< . <-pos) (?> . >-pos) (?@ . @-pos) | 1115 (setq %-pos (cdr %-pos)))) |
845 (?: . :-pos) (?, . comma-pos) (?! . !-pos) | 1116 |
846 (?% . %-pos) (?\; . \;-pos))))) | 1117 (if mail-extr-mangle-uucp |
847 (set record-pos-symbol | 1118 (cond (!-pos |
848 (cons (point) (symbol-value record-pos-symbol))) | 1119 ;; **** I don't understand this save-restriction and the |
849 (forward-char 1)) | 1120 ;; narrow-to-region inside it. Why did I do that? |
850 ((eq char ?.) | 1121 (save-restriction |
851 (forward-char 1)) | 1122 (cond ((and @-pos |
852 ((memq char '( | 1123 mail-extr-@-binds-tighter-than-!) |
853 ;; comment terminator illegal | 1124 (goto-char @-pos) |
854 ?\) | 1125 (setq %-pos (cons (point) %-pos) |
855 ;; domain literal terminator illegal | 1126 @-pos nil) |
856 ?\] | 1127 (mail-extr-delete-char 1) |
857 ;; \ allowed only within quoted strings, | 1128 (insert "%") |
858 ;; domain literals, and comments | 1129 (setq insert-point (point-max))) |
859 ?\\ | 1130 (mail-extr-@-binds-tighter-than-! |
860 )) | 1131 (setq insert-point (point-max))) |
861 (mail-extr-nuke-char-at (point)) | 1132 (%-pos |
862 (forward-char 1)) | 1133 (setq insert-point (mail-extr-last %-pos) |
863 (t | 1134 saved-%-pos (mapcar 'mail-extr-markerize %-pos) |
864 (forward-word 1))) | 1135 %-pos nil |
865 (or (eq char ?\() | 1136 @-pos (mail-extr-markerize @-pos))) |
866 ;; At the end of first address of a multiple address header. | 1137 (@-pos |
867 (and (eq char ?,) | 1138 (setq insert-point @-pos) |
868 (eobp)) | 1139 (setq @-pos (mail-extr-markerize @-pos))) |
869 (setq last-real-pos (point)))) | 1140 (t |
870 | 1141 (setq insert-point (point-max)))) |
871 ;; Use only the leftmost <, if any. Replace all others with spaces. | 1142 (narrow-to-region (point-min) insert-point) |
872 (while (cdr <-pos) | 1143 (setq saved-!-pos (car !-pos)) |
873 (mail-extr-nuke-char-at (car <-pos)) | 1144 (while !-pos |
874 (setq <-pos (cdr <-pos))) | 1145 (goto-char (point-max)) |
875 | 1146 (cond ((and (not @-pos) |
876 ;; Use only the rightmost >, if any. Replace all others with spaces. | 1147 (not (cdr !-pos))) |
877 (while (cdr >-pos) | 1148 (setq @-pos (point)) |
878 (mail-extr-nuke-char-at (nth 1 >-pos)) | 1149 (insert-before-markers "@ ")) |
879 (setcdr >-pos (nthcdr 2 >-pos))) | 1150 (t |
880 | 1151 (setq %-pos (cons (point) %-pos)) |
881 ;; If multiple @s and a :, but no < and >, insert around buffer. | 1152 (insert-before-markers "% "))) |
882 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc | 1153 (backward-char 1) |
883 ;; This commonly happens on the UUCP "From " line. Ugh. | 1154 (insert-buffer-substring |
884 (cond ((and (> (length @-pos) 1) | 1155 (current-buffer) |
885 (eq 1 (length :-pos)) ;TODO: check if between last two @s | 1156 (if (nth 1 !-pos) |
886 (not \;-pos) | 1157 (1+ (nth 1 !-pos)) |
887 (not <-pos)) | 1158 (point-min)) |
888 (goto-char (point-min)) | 1159 (car !-pos)) |
889 (mail-extr-delete-char 1) | 1160 (mail-extr-delete-char 1) |
890 (setq <-pos (list (point))) | 1161 (or (save-excursion |
891 (insert ?<))) | 1162 (mail-extr-safe-move-sexp -1) |
892 | 1163 (mail-extr-skip-whitespace-backward) |
893 ;; If < but no >, insert > in rightmost possible position | 1164 (eq ?. (preceding-char))) |
894 (cond ((and <-pos | 1165 (insert-before-markers |
895 (null >-pos)) | 1166 (if (save-excursion |
896 (goto-char (point-max)) | 1167 (mail-extr-skip-whitespace-backward) |
897 (setq >-pos (list (point))) | 1168 (eq ?. (preceding-char))) |
898 (insert ?>))) | 1169 "" |
899 | 1170 ".") |
900 ;; If > but no <, replace > with space. | 1171 "uucp")) |
901 (cond ((and >-pos | 1172 (setq !-pos (cdr !-pos)))) |
902 (null <-pos)) | 1173 (and saved-%-pos |
903 (mail-extr-nuke-char-at (car >-pos)) | 1174 (setq %-pos (append (mapcar 'mail-extr-demarkerize |
904 (setq >-pos nil))) | 1175 saved-%-pos) |
905 | 1176 %-pos))) |
906 ;; Turn >-pos and <-pos into non-lists | 1177 (setq @-pos (mail-extr-demarkerize @-pos)) |
907 (setq >-pos (car >-pos) | 1178 (narrow-to-region (1+ saved-!-pos) (point-max))))) |
908 <-pos (car <-pos)) | 1179 |
909 | 1180 ;; WARNING: THIS CODE IS DUPLICATED ABOVE. |
910 ;; Trim other punctuation lists of items outside < > pair to handle | 1181 (cond ((and %-pos |
911 ;; stupid MTAs. | 1182 (not @-pos)) |
912 (cond (<-pos ; don't need to check >-pos also | 1183 (goto-char (car %-pos)) |
913 ;; handle bozo software that violates RFC 822 by sticking | 1184 (mail-extr-delete-char 1) |
914 ;; punctuation marks outside of a < > pair | 1185 (setq @-pos (point)) |
915 (mail-extr-nuke-outside-range @-pos <-pos >-pos t) | 1186 (insert "@") |
916 ;; RFC 822 says nothing about these two outside < >, but | 1187 (setq %-pos (cdr %-pos)))) |
917 ;; remove those positions from the lists to make things | 1188 |
918 ;; easier. | 1189 (setq %-pos (nreverse %-pos)) |
919 (mail-extr-nuke-outside-range !-pos <-pos >-pos t) | 1190 (cond (%-pos ; implies @-pos valid |
920 (mail-extr-nuke-outside-range %-pos <-pos >-pos t))) | 1191 (setq temp %-pos) |
921 | 1192 (catch 'truncated |
922 ;; Check for : that indicates GROUP list and for : part of | 1193 (while temp |
923 ;; ROUTE-ADDR spec. | 1194 (goto-char (or (nth 1 temp) |
924 ;; Can't possibly be more than two :. Nuke any extra. | 1195 @-pos)) |
925 (while :-pos | 1196 (mail-extr-skip-whitespace-backward) |
926 (setq temp (car :-pos) | 1197 (save-excursion |
927 :-pos (cdr :-pos)) | 1198 (mail-extr-safe-move-sexp -1) |
928 (cond ((and <-pos >-pos | 1199 (setq domain-pos (point)) |
929 (> temp <-pos) | 1200 (mail-extr-skip-whitespace-backward) |
930 (< temp >-pos)) | 1201 (setq \.-pos (eq ?. (preceding-char)))) |
931 (if (or route-addr-:-pos | 1202 (cond ((and \.-pos |
932 (< (length @-pos) 2) | 1203 ;; #### string consing |
933 (> temp (car @-pos)) | 1204 (let ((s (intern-soft |
934 (< temp (nth 1 @-pos))) | 1205 (buffer-substring domain-pos (point)) |
935 (mail-extr-nuke-char-at temp) | 1206 mail-extr-all-top-level-domains))) |
936 (setq route-addr-:-pos temp))) | 1207 (and s (get s 'domain-name)))) |
937 ((or (not <-pos) | 1208 (narrow-to-region (point-min) (point)) |
938 (and <-pos | 1209 (goto-char (car temp)) |
939 (< temp <-pos))) | 1210 (mail-extr-delete-char 1) |
940 (setq group-:-pos temp)))) | 1211 (setq @-pos (point)) |
941 | 1212 (setcdr temp nil) |
942 ;; Nuke any ; that is in or to the left of a < > pair or to the left | 1213 (setq %-pos (delq @-pos %-pos)) |
943 ;; of a GROUP starting :. Also, there may only be one ;. | 1214 (insert "@") |
944 (while \;-pos | 1215 (throw 'truncated t))) |
945 (setq temp (car \;-pos) | 1216 (setq temp (cdr temp)))))) |
946 \;-pos (cdr \;-pos)) | 1217 (setq mbox-beg (point-min) |
947 (cond ((and <-pos >-pos | 1218 mbox-end (if %-pos (car %-pos) |
948 (> temp <-pos) | 1219 (or @-pos |
949 (< temp >-pos)) | 1220 (point-max))))) |
950 (mail-extr-nuke-char-at temp)) | 1221 |
951 ((and (or (not group-:-pos) | 1222 ;; Done canonicalizing address. |
952 (> temp group-:-pos)) | 1223 ;; We are now back in extraction-buffer. |
953 (not group-\;-pos)) | 1224 |
954 (setq group-\;-pos temp)))) | 1225 ;; Decide what part of the address to search to find the full name. |
955 | 1226 (cond ( |
956 ;; Nuke unmatched GROUP syntax characters. | 1227 ;; Example: "First M. Last" <fml@foo.bar.dom> |
957 (cond ((and group-:-pos (not group-\;-pos)) | 1228 (and phrase-beg |
958 ;; *** Do I really need to erase it? | 1229 (eq quote-beg phrase-beg) |
959 (mail-extr-nuke-char-at group-:-pos) | 1230 (<= quote-end phrase-end)) |
960 (setq group-:-pos nil))) | 1231 (narrow-to-region (1+ quote-beg) (1- quote-end)) |
961 (cond ((and group-\;-pos (not group-:-pos)) | 1232 (mail-extr-undo-backslash-quoting (point-min) (point-max))) |
962 ;; *** Do I really need to erase it? | 1233 |
963 (mail-extr-nuke-char-at group-\;-pos) | 1234 ;; Example: First Last <fml@foo.bar.dom> |
964 (setq group-\;-pos nil))) | 1235 (phrase-beg |
965 | 1236 (narrow-to-region phrase-beg phrase-end)) |
966 ;; Handle junk like ";@host.company.dom" that sendmail adds. | 1237 |
967 ;; **** should I remember comment positions? | 1238 ;; Example: fml@foo.bar.dom (First M. Last) |
968 (cond | 1239 (cbeg |
969 (group-\;-pos | 1240 (narrow-to-region (1+ cbeg) (1- cend)) |
970 ;; this is fine for now | 1241 (mail-extr-undo-backslash-quoting (point-min) (point-max)) |
971 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t) | 1242 |
972 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t) | 1243 ;; Deal with spacing problems |
973 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t) | 1244 (goto-char (point-min)) |
974 (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t) | 1245 ;;; (cond ((not (search-forward " " nil t)) |
975 (and last-real-pos | 1246 ;;; (goto-char (point-min)) |
976 (> last-real-pos (1+ group-\;-pos)) | 1247 ;;; (cond ((search-forward "_" nil t) |
977 (setq last-real-pos (1+ group-\;-pos))) | 1248 ;;; ;; Handle the *idiotic* use of underlines as spaces. |
978 ;; *** This may be wrong: | 1249 ;;; ;; Example: fml@foo.bar.dom (First_M._Last) |
979 (and cend | 1250 ;;; (goto-char (point-min)) |
980 (> cend group-\;-pos) | 1251 ;;; (while (search-forward "_" nil t) |
981 (setq cend nil | 1252 ;;; (replace-match " " t))) |
982 cbeg nil)) | 1253 ;;; ((search-forward "." nil t) |
983 (and quote-end | 1254 ;;; ;; Fix . used as space |
984 (> quote-end group-\;-pos) | 1255 ;;; ;; Example: danj1@cb.att.com (daniel.jacobson) |
985 (setq quote-end nil | 1256 ;;; (goto-char (point-min)) |
986 quote-beg nil)) | 1257 ;;; (while (re-search-forward mail-extr-bad-dot-pattern nil t) |
987 ;; This was both wrong and unnecessary: | 1258 ;;; (replace-match "\\1 \\2" t)))))) |
988 ;;(narrow-to-region (point-min) group-\;-pos) | 1259 ) |
989 | 1260 |
990 ;; *** The entire handling of GROUP addresses seems rather lame. | 1261 ;; Otherwise we try to get the name from the mailbox portion |
991 ;; *** It deserves a complete rethink, except that these addresses | 1262 ;; of the address. |
992 ;; *** are hardly ever seen. | 1263 ;; Example: First_M_Last@foo.bar.dom |
993 )) | 1264 (t |
994 | 1265 ;; *** Work in canon buffer instead? No, can't. Hmm. |
995 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any | |
996 ;; others. | |
997 ;; Hell, go ahead an nuke all of the commas. | |
998 ;; **** This will cause problems when we start handling commas in | |
999 ;; the PHRASE part .... no it won't ... yes it will ... ????? | |
1000 (mail-extr-nuke-outside-range comma-pos 1 1) | |
1001 | |
1002 ;; can only have multiple @s inside < >. The fact that some MTAs | |
1003 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is | |
1004 ;; handled above. | |
1005 | |
1006 ;; Locate PHRASE part of ROUTE-ADDR. | |
1007 (cond (<-pos | |
1008 (goto-char <-pos) | |
1009 (mail-extr-skip-whitespace-backward) | |
1010 (setq phrase-end (point)) | |
1011 (goto-char (or ;;group-:-pos | |
1012 (point-min))) | |
1013 (mail-extr-skip-whitespace-forward) | |
1014 (if (< (point) phrase-end) | |
1015 (setq phrase-beg (point)) | |
1016 (setq phrase-end nil)))) | |
1017 | |
1018 ;; handle ROUTE-ADDRS with real ROUTEs. | |
1019 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and | |
1020 ;; any % or ! must be semantically meaningless. | |
1021 ;; TODO: do this processing into canonicalization buffer | |
1022 (cond (route-addr-:-pos | |
1023 (setq !-pos nil | |
1024 %-pos nil | |
1025 >-pos (copy-marker >-pos) | |
1026 route-addr-:-pos (copy-marker route-addr-:-pos)) | |
1027 (goto-char >-pos) | |
1028 (insert-before-markers ?X) | |
1029 (goto-char (car @-pos)) | |
1030 (while (setq @-pos (cdr @-pos)) | |
1031 (mail-extr-delete-char 1) | |
1032 (setq %-pos (cons (point-marker) %-pos)) | |
1033 (insert "%") | |
1034 (goto-char (1- >-pos)) | |
1035 (save-excursion | |
1036 (insert-buffer-substring extraction-buffer | |
1037 (car @-pos) route-addr-:-pos) | |
1038 (delete-region (car @-pos) route-addr-:-pos)) | |
1039 (or (cdr @-pos) | |
1040 (setq saved-@-pos (list (point))))) | |
1041 (setq @-pos saved-@-pos) | |
1042 (goto-char >-pos) | |
1043 (mail-extr-delete-char -1) | |
1044 (mail-extr-nuke-char-at route-addr-:-pos) | |
1045 (mail-extr-demarkerize route-addr-:-pos) | |
1046 (setq route-addr-:-pos nil | |
1047 >-pos (mail-extr-demarkerize >-pos) | |
1048 %-pos (mapcar 'mail-extr-demarkerize %-pos)))) | |
1049 | |
1050 ;; de-listify @-pos | |
1051 (setq @-pos (car @-pos)) | |
1052 | |
1053 ;; TODO: remove comments in the middle of an address | |
1054 | |
1055 (set-buffer canonicalization-buffer) | |
1056 (fundamental-mode) | |
1057 (kill-all-local-variables) | |
1058 (buffer-disable-undo canonicalization-buffer) | |
1059 (set-syntax-table mail-extr-address-syntax-table) | |
1060 (setq case-fold-search nil) | |
1061 | |
1062 (widen) | |
1063 (erase-buffer) | |
1064 (insert-buffer-substring extraction-buffer) | |
1065 | |
1066 (if <-pos | |
1067 (narrow-to-region (progn | |
1068 (goto-char (1+ <-pos)) | |
1069 (mail-extr-skip-whitespace-forward) | |
1070 (point)) | |
1071 >-pos) | |
1072 (if (and first-real-pos last-real-pos) | |
1073 (narrow-to-region first-real-pos last-real-pos) | |
1074 ;; ****** Oh no! What if the address is completely empty! | |
1075 ;; *** Is this correct? | |
1076 (narrow-to-region (point-max) (point-max)) | |
1077 )) | |
1078 | |
1079 (and @-pos %-pos | |
1080 (mail-extr-nuke-outside-range %-pos (point-min) @-pos)) | |
1081 (and %-pos !-pos | |
1082 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos))) | |
1083 (and @-pos !-pos (not %-pos) | |
1084 (mail-extr-nuke-outside-range !-pos (point-min) @-pos)) | |
1085 | |
1086 ;; Error condition:?? (and %-pos (not @-pos)) | |
1087 | |
1088 ;; WARNING: THIS CODE IS DUPLICATED BELOW. | |
1089 (cond ((and %-pos | |
1090 (not @-pos)) | |
1091 (goto-char (car %-pos)) | |
1092 (mail-extr-delete-char 1) | |
1093 (setq @-pos (point)) | |
1094 (insert "@") | |
1095 (setq %-pos (cdr %-pos)))) | |
1096 | |
1097 (if mail-extr-mangle-uucp | |
1098 (cond (!-pos | |
1099 ;; **** I don't understand this save-restriction and the | |
1100 ;; narrow-to-region inside it. Why did I do that? | |
1101 (save-restriction | |
1102 (cond ((and @-pos | |
1103 mail-extr-@-binds-tighter-than-!) | |
1104 (goto-char @-pos) | |
1105 (setq %-pos (cons (point) %-pos) | |
1106 @-pos nil) | |
1107 (mail-extr-delete-char 1) | |
1108 (insert "%") | |
1109 (setq insert-point (point-max))) | |
1110 (mail-extr-@-binds-tighter-than-! | |
1111 (setq insert-point (point-max))) | |
1112 (%-pos | |
1113 (setq insert-point (mail-extr-last %-pos) | |
1114 saved-%-pos (mapcar 'mail-extr-markerize %-pos) | |
1115 %-pos nil | |
1116 @-pos (mail-extr-markerize @-pos))) | |
1117 (@-pos | |
1118 (setq insert-point @-pos) | |
1119 (setq @-pos (mail-extr-markerize @-pos))) | |
1120 (t | |
1121 (setq insert-point (point-max)))) | |
1122 (narrow-to-region (point-min) insert-point) | |
1123 (setq saved-!-pos (car !-pos)) | |
1124 (while !-pos | |
1125 (goto-char (point-max)) | 1266 (goto-char (point-max)) |
1126 (cond ((and (not @-pos) | 1267 (narrow-to-region (point) (point)) |
1127 (not (cdr !-pos))) | 1268 (insert-buffer-substring canonicalization-buffer |
1128 (setq @-pos (point)) | 1269 mbox-beg mbox-end) |
1129 (insert-before-markers "@ ")) | 1270 (goto-char (point-min)) |
1130 (t | 1271 |
1131 (setq %-pos (cons (point) %-pos)) | 1272 ;; Example: First_Last.XXX@foo.bar.dom |
1132 (insert-before-markers "% "))) | 1273 (setq \.-ends-name (re-search-forward "[_0-9]" nil t)) |
1133 (backward-char 1) | 1274 |
1134 (insert-buffer-substring | 1275 (goto-char (point-min)) |
1135 (current-buffer) | 1276 |
1136 (if (nth 1 !-pos) | 1277 (if (not mail-extr-mangle-uucp) |
1137 (1+ (nth 1 !-pos)) | 1278 (modify-syntax-entry ?! "w" (syntax-table))) |
1138 (point-min)) | 1279 |
1139 (car !-pos)) | 1280 (while (progn |
1140 (mail-extr-delete-char 1) | 1281 (mail-extr-skip-whitespace-forward) |
1141 (or (save-excursion | 1282 (not (eobp))) |
1142 (mail-extr-safe-move-sexp -1) | 1283 (setq char (char-after (point))) |
1143 (mail-extr-skip-whitespace-backward) | 1284 (cond |
1144 (eq ?. (preceding-char))) | 1285 ((eq char ?\") |
1145 (insert-before-markers | 1286 (setq quote-beg (point)) |
1146 (if (save-excursion | 1287 (or (mail-extr-safe-move-sexp 1) |
1147 (mail-extr-skip-whitespace-backward) | 1288 ;; TODO: handle this error condition!!!!! |
1148 (eq ?. (preceding-char))) | 1289 (forward-char 1)) |
1149 "" | 1290 ;; take into account deletions |
1150 ".") | 1291 (setq quote-end (- (point) 2)) |
1151 "uucp")) | 1292 (save-excursion |
1152 (setq !-pos (cdr !-pos)))) | 1293 (backward-char 1) |
1153 (and saved-%-pos | 1294 (mail-extr-delete-char 1) |
1154 (setq %-pos (append (mapcar 'mail-extr-demarkerize | 1295 (goto-char quote-beg) |
1155 saved-%-pos) | 1296 (or (eobp) |
1156 %-pos))) | 1297 (mail-extr-delete-char 1))) |
1157 (setq @-pos (mail-extr-demarkerize @-pos)) | 1298 (mail-extr-undo-backslash-quoting quote-beg quote-end) |
1158 (narrow-to-region (1+ saved-!-pos) (point-max))))) | 1299 (or (eq ?\ (char-after (point))) |
1159 | 1300 (insert " ")) |
1160 ;; WARNING: THIS CODE IS DUPLICATED ABOVE. | 1301 ;; (setq mailbox-name-processed-flag t) |
1161 (cond ((and %-pos | 1302 (setq \.-ends-name t)) |
1162 (not @-pos)) | 1303 ((eq char ?.) |
1163 (goto-char (car %-pos)) | 1304 (if (memq (char-after (1+ (point))) '(?_ ?=)) |
1164 (mail-extr-delete-char 1) | 1305 (progn |
1165 (setq @-pos (point)) | 1306 (forward-char 1) |
1166 (insert "@") | 1307 (mail-extr-delete-char 1) |
1167 (setq %-pos (cdr %-pos)))) | 1308 (insert ?\ )) |
1168 | 1309 (if \.-ends-name |
1169 (setq %-pos (nreverse %-pos)) | 1310 (narrow-to-region (point-min) (point)) |
1170 (cond (%-pos ; implies @-pos valid | 1311 (mail-extr-delete-char 1) |
1171 (setq temp %-pos) | 1312 (insert " "))) |
1172 (catch 'truncated | 1313 ;; (setq mailbox-name-processed-flag t) |
1173 (while temp | 1314 ) |
1174 (goto-char (or (nth 1 temp) | 1315 ((memq (char-syntax char) '(?. ?\\)) |
1175 @-pos)) | 1316 (mail-extr-delete-char 1) |
1176 (mail-extr-skip-whitespace-backward) | 1317 (insert " ") |
1318 ;; (setq mailbox-name-processed-flag t) | |
1319 ) | |
1320 (t | |
1321 (setq atom-beg (point)) | |
1322 (forward-word 1) | |
1323 (setq atom-end (point)) | |
1324 (goto-char atom-beg) | |
1325 (save-restriction | |
1326 (narrow-to-region atom-beg atom-end) | |
1327 (cond | |
1328 | |
1329 ;; Handle X.400 addresses encoded in RFC-822. | |
1330 ;; *** Shit! This has to handle the case where it is | |
1331 ;; *** embedded in a quote too! | |
1332 ;; *** Shit! The input is being broken up into atoms | |
1333 ;; *** by periods! | |
1334 ((looking-at mail-extr-x400-encoded-address-pattern) | |
1335 | |
1336 ;; Copy the contents of the individual fields that | |
1337 ;; might hold name data to the beginning. | |
1338 (mapcar | |
1339 (function | |
1340 (lambda (field-pattern) | |
1341 (cond | |
1342 ((save-excursion | |
1343 (re-search-forward field-pattern nil t)) | |
1344 (insert-buffer-substring (current-buffer) | |
1345 (match-beginning 1) | |
1346 (match-end 1)) | |
1347 (insert " "))))) | |
1348 (list mail-extr-x400-encoded-address-given-name-pattern | |
1349 mail-extr-x400-encoded-address-surname-pattern | |
1350 mail-extr-x400-encoded-address-full-name-pattern)) | |
1351 | |
1352 ;; Discard the rest, since it contains stuff like | |
1353 ;; routing information, not part of a name. | |
1354 (mail-extr-skip-whitespace-backward) | |
1355 (delete-region (point) (point-max)) | |
1356 | |
1357 ;; Handle periods used for spacing. | |
1358 (while (re-search-forward mail-extr-bad-dot-pattern nil t) | |
1359 (replace-match "\\1 \\2" t)) | |
1360 | |
1361 ;; (setq mailbox-name-processed-flag t) | |
1362 ) | |
1363 | |
1364 ;; Handle normal addresses. | |
1365 (t | |
1366 (goto-char (point-min)) | |
1367 ;; Handle _ and = used for spacing. | |
1368 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t) | |
1369 (replace-match "\\1 " t) | |
1370 ;; (setq mailbox-name-processed-flag t) | |
1371 ) | |
1372 (goto-char (point-max)))))))) | |
1373 | |
1374 ;; undo the dirty deed | |
1375 (if (not mail-extr-mangle-uucp) | |
1376 (modify-syntax-entry ?! "." (syntax-table))) | |
1377 ;; | |
1378 ;; If we derived the name from the mailbox part of the address, | |
1379 ;; and we only got one word out of it, don't treat that as a | |
1380 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar") | |
1381 ;; (if (not mailbox-name-processed-flag) | |
1382 ;; (delete-region (point-min) (point-max))) | |
1383 )) | |
1384 | |
1385 (set-syntax-table mail-extr-address-text-syntax-table) | |
1386 | |
1387 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer) | |
1388 (goto-char (point-min)) | |
1389 | |
1390 ;; If name is "First Last" and userid is "F?L", then assume | |
1391 ;; the middle initial is the second letter in the userid. | |
1392 ;; Initial code by Jamie Zawinski <jwz@lucid.com> | |
1393 ;; *** Make it work when there's a suffix as well. | |
1394 (goto-char (point-min)) | |
1395 (cond ((and mail-extr-guess-middle-initial | |
1396 (not disable-initial-guessing-flag) | |
1397 (eq 3 (- mbox-end mbox-beg)) | |
1398 (progn | |
1399 (goto-char (point-min)) | |
1400 (looking-at mail-extr-two-name-pattern))) | |
1401 (setq fi (char-after (match-beginning 0)) | |
1402 li (char-after (match-beginning 3))) | |
1177 (save-excursion | 1403 (save-excursion |
1178 (mail-extr-safe-move-sexp -1) | 1404 (set-buffer canonicalization-buffer) |
1179 (setq domain-pos (point)) | 1405 ;; char-equal is ignoring case here, so no need to upcase |
1180 (mail-extr-skip-whitespace-backward) | 1406 ;; or downcase. |
1181 (setq \.-pos (eq ?. (preceding-char)))) | 1407 (let ((case-fold-search t)) |
1182 (cond ((and \.-pos | 1408 (and (char-equal fi (char-after mbox-beg)) |
1183 ;; #### string consing | 1409 (char-equal li (char-after (1- mbox-end))) |
1184 (let ((s (intern-soft | 1410 (setq mi (char-after (1+ mbox-beg)))))) |
1185 (buffer-substring domain-pos (point)) | 1411 (cond ((and mi |
1186 mail-extr-all-top-level-domains))) | 1412 ;; TODO: use better table than syntax table |
1187 (and s (get s 'domain-name)))) | 1413 (eq ?w (char-syntax mi))) |
1188 (narrow-to-region (point-min) (point)) | 1414 (goto-char (match-beginning 3)) |
1189 (goto-char (car temp)) | 1415 (insert (upcase mi) ". "))))) |
1190 (mail-extr-delete-char 1) | 1416 |
1191 (setq @-pos (point)) | 1417 ;; Nuke name if it is the same as mailbox name. |
1192 (setcdr temp nil) | 1418 (let ((buffer-length (- (point-max) (point-min))) |
1193 (setq %-pos (delq @-pos %-pos)) | 1419 (i 0) |
1194 (insert "@") | 1420 (names-match-flag t)) |
1195 (throw 'truncated t))) | 1421 (cond ((and (> buffer-length 0) |
1196 (setq temp (cdr temp)))))) | 1422 (eq buffer-length (- mbox-end mbox-beg))) |
1197 (setq mbox-beg (point-min) | 1423 (goto-char (point-max)) |
1198 mbox-end (if %-pos (car %-pos) | 1424 (insert-buffer-substring canonicalization-buffer |
1199 (or @-pos | 1425 mbox-beg mbox-end) |
1200 (point-max)))) | 1426 (while (and names-match-flag |
1201 | 1427 (< i buffer-length)) |
1202 ;; Done canonicalizing address. | 1428 (or (eq (downcase (char-after (+ i (point-min)))) |
1203 | 1429 (downcase |
1204 (set-buffer extraction-buffer) | 1430 (char-after (+ i buffer-length (point-min))))) |
1205 | 1431 (setq names-match-flag nil)) |
1206 ;; Decide what part of the address to search to find the full name. | 1432 (setq i (1+ i))) |
1207 (cond ( | 1433 (delete-region (+ (point-min) buffer-length) (point-max)) |
1208 ;; Example: "First M. Last" <fml@foo.bar.dom> | 1434 (if names-match-flag |
1209 (and phrase-beg | 1435 (narrow-to-region (point) (point)))))) |
1210 (eq quote-beg phrase-beg) | 1436 |
1211 (<= quote-end phrase-end)) | 1437 ;; Nuke name if it's just one word. |
1212 (narrow-to-region (1+ quote-beg) (1- quote-end)) | 1438 (goto-char (point-min)) |
1213 (mail-extr-undo-backslash-quoting (point-min) (point-max))) | 1439 (and mail-extr-ignore-single-names |
1214 | 1440 (not (re-search-forward "[- ]" nil t)) |
1215 ;; Example: First Last <fml@foo.bar.dom> | 1441 (narrow-to-region (point) (point))) |
1216 (phrase-beg | 1442 |
1217 (narrow-to-region phrase-beg phrase-end)) | 1443 ;; Record the result |
1218 | 1444 (setq value-list |
1219 ;; Example: fml@foo.bar.dom (First M. Last) | 1445 (cons (list (if (not (= (point-min) (point-max))) |
1220 (cbeg | 1446 (buffer-string)) |
1221 (narrow-to-region (1+ cbeg) (1- cend)) | 1447 (save-excursion |
1222 (mail-extr-undo-backslash-quoting (point-min) (point-max)) | 1448 (set-buffer canonicalization-buffer) |
1223 | 1449 (if (not (= (point-min) (point-max))) |
1224 ;; Deal with spacing problems | 1450 (buffer-string)))) |
1225 (goto-char (point-min)) | 1451 value-list)) |
1226 ; (cond ((not (search-forward " " nil t)) | 1452 |
1227 ; (goto-char (point-min)) | 1453 ;; Unless one address is all we wanted, |
1228 ; (cond ((search-forward "_" nil t) | 1454 ;; delete this one from extraction-buffer |
1229 ; ;; Handle the *idiotic* use of underlines as spaces. | 1455 ;; and get ready to extract the next address. |
1230 ; ;; Example: fml@foo.bar.dom (First_M._Last) | 1456 (when all |
1231 ; (goto-char (point-min)) | 1457 (if end-of-address |
1232 ; (while (search-forward "_" nil t) | 1458 (narrow-to-region 1 end-of-address) |
1233 ; (replace-match " " t))) | 1459 (widen)) |
1234 ; ((search-forward "." nil t) | 1460 (delete-region (point-min) (point-max)) |
1235 ; ;; Fix . used as space | 1461 (widen)) |
1236 ; ;; Example: danj1@cb.att.com (daniel.jacobson) | 1462 ))) |
1237 ; (goto-char (point-min)) | 1463 (if all (nreverse value-list) (car value-list)) |
1238 ; (while (re-search-forward mail-extr-bad-dot-pattern nil t) | 1464 )) |
1239 ; (replace-match "\\1 \\2" t)))))) | |
1240 ) | |
1241 | |
1242 ;; Otherwise we try to get the name from the mailbox portion | |
1243 ;; of the address. | |
1244 ;; Example: First_M_Last@foo.bar.dom | |
1245 (t | |
1246 ;; *** Work in canon buffer instead? No, can't. Hmm. | |
1247 (goto-char (point-max)) | |
1248 (narrow-to-region (point) (point)) | |
1249 (insert-buffer-substring canonicalization-buffer | |
1250 mbox-beg mbox-end) | |
1251 (goto-char (point-min)) | |
1252 | |
1253 ;; Example: First_Last.XXX@foo.bar.dom | |
1254 (setq \.-ends-name (re-search-forward "[_0-9]" nil t)) | |
1255 | |
1256 (goto-char (point-min)) | |
1257 | |
1258 (if (not mail-extr-mangle-uucp) | |
1259 (modify-syntax-entry ?! "w" (syntax-table))) | |
1260 | |
1261 (while (progn | |
1262 (mail-extr-skip-whitespace-forward) | |
1263 (not (eobp))) | |
1264 (setq char (char-after (point))) | |
1265 (cond | |
1266 ((eq char ?\") | |
1267 (setq quote-beg (point)) | |
1268 (or (mail-extr-safe-move-sexp 1) | |
1269 ;; TODO: handle this error condition!!!!! | |
1270 (forward-char 1)) | |
1271 ;; take into account deletions | |
1272 (setq quote-end (- (point) 2)) | |
1273 (save-excursion | |
1274 (backward-char 1) | |
1275 (mail-extr-delete-char 1) | |
1276 (goto-char quote-beg) | |
1277 (or (eobp) | |
1278 (mail-extr-delete-char 1))) | |
1279 (mail-extr-undo-backslash-quoting quote-beg quote-end) | |
1280 (or (eq ?\ (char-after (point))) | |
1281 (insert " ")) | |
1282 ;; (setq mailbox-name-processed-flag t) | |
1283 (setq \.-ends-name t)) | |
1284 ((eq char ?.) | |
1285 (if (memq (char-after (1+ (point))) '(?_ ?=)) | |
1286 (progn | |
1287 (forward-char 1) | |
1288 (mail-extr-delete-char 1) | |
1289 (insert ?\ )) | |
1290 (if \.-ends-name | |
1291 (narrow-to-region (point-min) (point)) | |
1292 (mail-extr-delete-char 1) | |
1293 (insert " "))) | |
1294 ;; (setq mailbox-name-processed-flag t) | |
1295 ) | |
1296 ((memq (char-syntax char) '(?. ?\\)) | |
1297 (mail-extr-delete-char 1) | |
1298 (insert " ") | |
1299 ;; (setq mailbox-name-processed-flag t) | |
1300 ) | |
1301 (t | |
1302 (setq atom-beg (point)) | |
1303 (forward-word 1) | |
1304 (setq atom-end (point)) | |
1305 (goto-char atom-beg) | |
1306 (save-restriction | |
1307 (narrow-to-region atom-beg atom-end) | |
1308 (cond | |
1309 | |
1310 ;; Handle X.400 addresses encoded in RFC-822. | |
1311 ;; *** Shit! This has to handle the case where it is | |
1312 ;; *** embedded in a quote too! | |
1313 ;; *** Shit! The input is being broken up into atoms | |
1314 ;; *** by periods! | |
1315 ((looking-at mail-extr-x400-encoded-address-pattern) | |
1316 | |
1317 ;; Copy the contents of the individual fields that | |
1318 ;; might hold name data to the beginning. | |
1319 (mapcar | |
1320 (function | |
1321 (lambda (field-pattern) | |
1322 (cond | |
1323 ((save-excursion | |
1324 (re-search-forward field-pattern nil t)) | |
1325 (insert-buffer-substring (current-buffer) | |
1326 (match-beginning 1) | |
1327 (match-end 1)) | |
1328 (insert " "))))) | |
1329 (list mail-extr-x400-encoded-address-given-name-pattern | |
1330 mail-extr-x400-encoded-address-surname-pattern | |
1331 mail-extr-x400-encoded-address-full-name-pattern)) | |
1332 | |
1333 ;; Discard the rest, since it contains stuff like | |
1334 ;; routing information, not part of a name. | |
1335 (mail-extr-skip-whitespace-backward) | |
1336 (delete-region (point) (point-max)) | |
1337 | |
1338 ;; Handle periods used for spacing. | |
1339 (while (re-search-forward mail-extr-bad-dot-pattern nil t) | |
1340 (replace-match "\\1 \\2" t)) | |
1341 | |
1342 ;; (setq mailbox-name-processed-flag t) | |
1343 ) | |
1344 | |
1345 ;; Handle normal addresses. | |
1346 (t | |
1347 (goto-char (point-min)) | |
1348 ;; Handle _ and = used for spacing. | |
1349 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t) | |
1350 (replace-match "\\1 " t) | |
1351 ;; (setq mailbox-name-processed-flag t) | |
1352 ) | |
1353 (goto-char (point-max)))))))) | |
1354 | |
1355 ;; undo the dirty deed | |
1356 (if (not mail-extr-mangle-uucp) | |
1357 (modify-syntax-entry ?! "." (syntax-table))) | |
1358 ;; | |
1359 ;; If we derived the name from the mailbox part of the address, | |
1360 ;; and we only got one word out of it, don't treat that as a | |
1361 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar") | |
1362 ;; (if (not mailbox-name-processed-flag) | |
1363 ;; (delete-region (point-min) (point-max))) | |
1364 )) | |
1365 | |
1366 (set-syntax-table mail-extr-address-text-syntax-table) | |
1367 | |
1368 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer) | |
1369 (goto-char (point-min)) | |
1370 | |
1371 ;; If name is "First Last" and userid is "F?L", then assume | |
1372 ;; the middle initial is the second letter in the userid. | |
1373 ;; Initial code by Jamie Zawinski <jwz@lucid.com> | |
1374 ;; *** Make it work when there's a suffix as well. | |
1375 (goto-char (point-min)) | |
1376 (cond ((and mail-extr-guess-middle-initial | |
1377 (not disable-initial-guessing-flag) | |
1378 (eq 3 (- mbox-end mbox-beg)) | |
1379 (progn | |
1380 (goto-char (point-min)) | |
1381 (looking-at mail-extr-two-name-pattern))) | |
1382 (setq fi (char-after (match-beginning 0)) | |
1383 li (char-after (match-beginning 3))) | |
1384 (save-excursion | |
1385 (set-buffer canonicalization-buffer) | |
1386 ;; char-equal is ignoring case here, so no need to upcase | |
1387 ;; or downcase. | |
1388 (let ((case-fold-search t)) | |
1389 (and (char-equal fi (char-after mbox-beg)) | |
1390 (char-equal li (char-after (1- mbox-end))) | |
1391 (setq mi (char-after (1+ mbox-beg)))))) | |
1392 (cond ((and mi | |
1393 ;; TODO: use better table than syntax table | |
1394 (eq ?w (char-syntax mi))) | |
1395 (goto-char (match-beginning 3)) | |
1396 (insert (upcase mi) ". "))))) | |
1397 | |
1398 ;; Nuke name if it is the same as mailbox name. | |
1399 (let ((buffer-length (- (point-max) (point-min))) | |
1400 (i 0) | |
1401 (names-match-flag t)) | |
1402 (cond ((and (> buffer-length 0) | |
1403 (eq buffer-length (- mbox-end mbox-beg))) | |
1404 (goto-char (point-max)) | |
1405 (insert-buffer-substring canonicalization-buffer | |
1406 mbox-beg mbox-end) | |
1407 (while (and names-match-flag | |
1408 (< i buffer-length)) | |
1409 (or (eq (downcase (char-after (+ i (point-min)))) | |
1410 (downcase | |
1411 (char-after (+ i buffer-length (point-min))))) | |
1412 (setq names-match-flag nil)) | |
1413 (setq i (1+ i))) | |
1414 (delete-region (+ (point-min) buffer-length) (point-max)) | |
1415 (if names-match-flag | |
1416 (narrow-to-region (point) (point)))))) | |
1417 | |
1418 ;; Nuke name if it's just one word. | |
1419 (goto-char (point-min)) | |
1420 (and mail-extr-ignore-single-names | |
1421 (not (re-search-forward "[- ]" nil t)) | |
1422 (narrow-to-region (point) (point))) | |
1423 | |
1424 ;; Result | |
1425 (list (if (not (= (point-min) (point-max))) | |
1426 (buffer-string)) | |
1427 (progn | |
1428 (set-buffer canonicalization-buffer) | |
1429 (if (not (= (point-min) (point-max))) | |
1430 (buffer-string)))) | |
1431 ))) | |
1432 | 1465 |
1433 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) | 1466 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) |
1434 (let ((word-count 0) | 1467 (let ((word-count 0) |
1435 (case-fold-search nil) | 1468 (case-fold-search nil) |
1436 mixed-case-flag lower-case-flag ;;upper-case-flag | 1469 mixed-case-flag lower-case-flag ;;upper-case-flag |