comparison lisp/mail/mail-extr.el @ 20285:3b0ad3d46bde

(mail-extract-address-components): New arg ALL says return info about all the addresses. Clarify buffer switching logic using save-excursion.
author Karl Heuer <kwzh@gnu.org>
date Thu, 20 Nov 1997 21:45:59 +0000
parents 5458685d9a67
children fd74979309d3
comparison
equal deleted inserted replaced
20284:ff0f79a7b8b6 20285:3b0ad3d46bde
1 ;;; mail-extr.el --- extract full name and address from RFC 822 mail header. 1 ;;; mail-extr.el --- extract full name and address from RFC 822 mail header.
2 2
3 ;; Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc. 3 ;; Copyright (C) 1991, 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
4 4
5 ;; Author: Joe Wells <jbw@cs.bu.edu> 5 ;; Author: Joe Wells <jbw@cs.bu.edu>
6 ;; Maintainer: Jamie Zawinski <jwz@lucid.com> 6 ;; Maintainer: FSF
7 ;; Version: 1.8
8 ;; Keywords: mail 7 ;; Keywords: mail
9 8
10 ;; This file is part of GNU Emacs. 9 ;; This file is part of GNU Emacs.
11 10
12 ;; GNU Emacs is free software; you can redistribute it and/or modify 11 ;; GNU Emacs is free software; you can redistribute it and/or modify
26 25
27 ;;; Commentary: 26 ;;; Commentary:
28 27
29 ;; The entry point of this code is 28 ;; The entry point of this code is
30 ;; 29 ;;
31 ;; mail-extract-address-components: (address) 30 ;; mail-extract-address-components: (address &optional all)
32 ;; 31 ;;
33 ;; Given an RFC-822 ADDRESS, extract full name and canonical address. 32 ;; Given an RFC-822 ADDRESS, extract full name and canonical address.
34 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). 33 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
35 ;; If no name can be extracted, FULL-NAME will be nil. 34 ;; If no name can be extracted, FULL-NAME will be nil.
36 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible 35 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
37 ;; (narrowed) portion of the buffer will be interpreted as the address. 36 ;; (narrowed) portion of the buffer will be interpreted as the address.
38 ;; (This feature exists so that the clever caller might be able to avoid 37 ;; (This feature exists so that the clever caller might be able to avoid
39 ;; consing a string.) 38 ;; consing a string.)
40 ;; If ADDRESS contains more than one RFC-822 address, only the first is 39 ;; If ADDRESS contains more than one RFC-822 address, only the first is
41 ;; returned. 40 ;; returned.
41 ;;
42 ;; If ALL is non-nil, that means return info about all the addresses
43 ;; that are found in ADDRESS. The value is a list of elements of
44 ;; the form (FULL-NAME CANONICAL-ADDRESS), one per address.
42 ;; 45 ;;
43 ;; This code is more correct (and more heuristic) parser than the code in 46 ;; This code is more correct (and more heuristic) parser than the code in
44 ;; rfc822.el. And despite its size, it's fairly fast. 47 ;; rfc822.el. And despite its size, it's fairly fast.
45 ;; 48 ;;
46 ;; There are two main benefits: 49 ;; There are two main benefits:
704 (defvar disable-initial-guessing-flag) ; dynamic assignment 707 (defvar disable-initial-guessing-flag) ; dynamic assignment
705 (defvar cbeg) ; dynamic assignment 708 (defvar cbeg) ; dynamic assignment
706 (defvar cend) ; dynamic assignment 709 (defvar cend) ; dynamic assignment
707 710
708 ;;;###autoload 711 ;;;###autoload
709 (defun mail-extract-address-components (address) 712 (defun mail-extract-address-components (address &optional all)
710 "Given an RFC-822 ADDRESS, extract full name and canonical address. 713 "Given an RFC-822 address ADDRESS, extract full name and canonical address.
711 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). 714 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
712 If no name can be extracted, FULL-NAME will be nil. 715 If no name can be extracted, FULL-NAME will be nil.
716
717 If the optional argument ALL is non-nil, then ADDRESS can contain zero
718 or more recipients, separated by commas, and we return a list of
719 the form ((FULL-NAME CANONICAL-ADDRESS) ...) with one element for
720 each recipient. If ALL is nil, then if ADDRESS contains more than
721 one recipients, all but the first is ignored.
722
713 ADDRESS may be a string or a buffer. If it is a buffer, the visible 723 ADDRESS may be a string or a buffer. If it is a buffer, the visible
714 (narrowed) portion of the buffer will be interpreted as the address. 724 (narrowed) portion of the buffer will be interpreted as the address.
715 (This feature exists so that the clever caller might be able to avoid 725 (This feature exists so that the clever caller might be able to avoid
716 consing a string.) 726 consing a string.)"
717 If ADDRESS contains more than one RFC-822 address, only the first is
718 returned. Some day this function may be extended to extract multiple
719 addresses, or perhaps return the position at which parsing stopped."
720 (let ((canonicalization-buffer (get-buffer-create " *canonical address*")) 727 (let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
721 (extraction-buffer (get-buffer-create " *extract address components*")) 728 (extraction-buffer (get-buffer-create " *extract address components*"))
722 char 729 value-list)
723 ;; multiple-addresses 730
724 <-pos >-pos @-pos :-pos comma-pos !-pos %-pos \;-pos
725 group-:-pos group-\;-pos route-addr-:-pos
726 record-pos-symbol
727 first-real-pos last-real-pos
728 phrase-beg phrase-end
729 cbeg cend ; dynamically set from -voodoo
730 quote-beg quote-end
731 atom-beg atom-end
732 mbox-beg mbox-end
733 \.-ends-name
734 temp
735 ;; name-suffix
736 fi mi li ; first, middle, last initial
737 saved-%-pos saved-!-pos saved-@-pos
738 domain-pos \.-pos insert-point
739 ;; mailbox-name-processed-flag
740 disable-initial-guessing-flag ; dynamically set from -voodoo
741 )
742
743 (save-excursion 731 (save-excursion
744 (set-buffer extraction-buffer) 732 (set-buffer extraction-buffer)
745 (fundamental-mode) 733 (fundamental-mode)
746 (kill-all-local-variables)
747 (buffer-disable-undo extraction-buffer) 734 (buffer-disable-undo extraction-buffer)
748 (set-syntax-table mail-extr-address-syntax-table) 735 (set-syntax-table mail-extr-address-syntax-table)
749 (widen) 736 (widen)
750 (erase-buffer) 737 (erase-buffer)
751 (setq case-fold-search nil) 738 (setq case-fold-search nil)
761 (insert-buffer-substring address)) 748 (insert-buffer-substring address))
762 (t 749 (t
763 (error "Invalid address: %s" address))) 750 (error "Invalid address: %s" address)))
764 751
765 (set-text-properties (point-min) (point-max) nil) 752 (set-text-properties (point-min) (point-max) nil)
753
754 (save-excursion
755 (set-buffer canonicalization-buffer)
756 (fundamental-mode)
757 (buffer-disable-undo canonicalization-buffer)
758 (set-syntax-table mail-extr-address-syntax-table)
759 (setq case-fold-search nil))
760
766 761
767 ;; stolen from rfc822.el
768 ;; Unfold multiple lines. 762 ;; Unfold multiple lines.
769 (goto-char (point-min)) 763 (goto-char (point-min))
770 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) 764 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
771 (replace-match "\\1 " t)) 765 (replace-match "\\1 " t))
772 766
773 ;; first pass grabs useful information about address 767 ;; Loop over addresses until we have as many as we want.
774 (goto-char (point-min)) 768 (while (and (or all (null value-list))
775 (while (progn 769 (progn (goto-char (point-min))
776 (mail-extr-skip-whitespace-forward) 770 (skip-chars-forward " \t")
777 (not (eobp))) 771 (not (eobp))))
778 (setq char (char-after (point))) 772 (let (char
779 (or first-real-pos 773 end-of-address
780 (if (not (eq char ?\()) 774 <-pos >-pos @-pos :-pos comma-pos !-pos %-pos \;-pos
781 (setq first-real-pos (point)))) 775 group-:-pos group-\;-pos route-addr-:-pos
782 (cond 776 record-pos-symbol
783 ;; comment 777 first-real-pos last-real-pos
784 ((eq char ?\() 778 phrase-beg phrase-end
785 (set-syntax-table mail-extr-address-comment-syntax-table) 779 cbeg cend ; dynamically set from -voodoo
786 ;; only record the first non-empty comment's position 780 quote-beg quote-end
787 (if (and (not cbeg) 781 atom-beg atom-end
782 mbox-beg mbox-end
783 \.-ends-name
784 temp
785 ;; name-suffix
786 fi mi li ; first, middle, last initial
787 saved-%-pos saved-!-pos saved-@-pos
788 domain-pos \.-pos insert-point
789 ;; mailbox-name-processed-flag
790 disable-initial-guessing-flag) ; dynamically set from -voodoo
791
792 (goto-char (point-min))
793
794 ;; Insert extra space at beginning to allow later replacement with <
795 ;; without having to move markers.
796 (or (eq (following-char) ?\ )
797 (insert ?\ ))
798
799 ;; First pass grabs useful information about address.
800 (while (progn
801 (mail-extr-skip-whitespace-forward)
802 (not (eobp)))
803 (setq char (char-after (point)))
804 (or first-real-pos
805 (if (not (eq char ?\())
806 (setq first-real-pos (point))))
807 (cond
808 ;; comment
809 ((eq char ?\()
810 (set-syntax-table mail-extr-address-comment-syntax-table)
811 ;; only record the first non-empty comment's position
812 (if (and (not cbeg)
813 (save-excursion
814 (forward-char 1)
815 (mail-extr-skip-whitespace-forward)
816 (not (eq ?\) (char-after (point))))))
817 (setq cbeg (point)))
818 ;; TODO: don't record if unbalanced
819 (or (mail-extr-safe-move-sexp 1)
820 (forward-char 1))
821 (set-syntax-table mail-extr-address-syntax-table)
822 (if (and cbeg
823 (not cend))
824 (setq cend (point))))
825 ;; quoted text
826 ((eq char ?\")
827 ;; only record the first non-empty quote's position
828 (if (and (not quote-beg)
829 (save-excursion
830 (forward-char 1)
831 (mail-extr-skip-whitespace-forward)
832 (not (eq ?\" (char-after (point))))))
833 (setq quote-beg (point)))
834 ;; TODO: don't record if unbalanced
835 (or (mail-extr-safe-move-sexp 1)
836 (forward-char 1))
837 (if (and quote-beg
838 (not quote-end))
839 (setq quote-end (point))))
840 ;; domain literals
841 ((eq char ?\[)
842 (set-syntax-table mail-extr-address-domain-literal-syntax-table)
843 (or (mail-extr-safe-move-sexp 1)
844 (forward-char 1))
845 (set-syntax-table mail-extr-address-syntax-table))
846 ;; commas delimit addresses when outside < > pairs.
847 ((and (eq char ?,)
848 (or (and (null <-pos)
849 ;; Handle ROUTE-ADDR address that is missing its <.
850 (not (eq ?@ (char-after (1+ (point))))))
851 (and >-pos
852 ;; handle weird munged addresses
853 ;; BUG FIX: This test was reversed. Thanks to the
854 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au>
855 ;; for discovering this!
856 (< (mail-extr-last <-pos) (car >-pos)))))
857 ;; The argument contains more than one address.
858 ;; Temporarily hide everything after this one.
859 (setq end-of-address (copy-marker (1+ (point))))
860 (narrow-to-region (point-min) (1+ (point)))
861 (mail-extr-delete-char 1)
862 (setq char ?\() ; HAVE I NO SHAME??
863 )
864 ;; record the position of various interesting chars, determine
865 ;; legality later.
866 ((setq record-pos-symbol
867 (cdr (assq char
868 '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
869 (?: . :-pos) (?, . comma-pos) (?! . !-pos)
870 (?% . %-pos) (?\; . \;-pos)))))
871 (set record-pos-symbol
872 (cons (point) (symbol-value record-pos-symbol)))
873 (forward-char 1))
874 ((eq char ?.)
875 (forward-char 1))
876 ((memq char '(
877 ;; comment terminator illegal
878 ?\)
879 ;; domain literal terminator illegal
880 ?\]
881 ;; \ allowed only within quoted strings,
882 ;; domain literals, and comments
883 ?\\
884 ))
885 (mail-extr-nuke-char-at (point))
886 (forward-char 1))
887 (t
888 (forward-word 1)))
889 (or (eq char ?\()
890 ;; At the end of first address of a multiple address header.
891 (and (eq char ?,)
892 (eobp))
893 (setq last-real-pos (point))))
894
895 ;; Use only the leftmost <, if any. Replace all others with spaces.
896 (while (cdr <-pos)
897 (mail-extr-nuke-char-at (car <-pos))
898 (setq <-pos (cdr <-pos)))
899
900 ;; Use only the rightmost >, if any. Replace all others with spaces.
901 (while (cdr >-pos)
902 (mail-extr-nuke-char-at (nth 1 >-pos))
903 (setcdr >-pos (nthcdr 2 >-pos)))
904
905 ;; If multiple @s and a :, but no < and >, insert around buffer.
906 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
907 ;; This commonly happens on the UUCP "From " line. Ugh.
908 (cond ((and (> (length @-pos) 1)
909 (eq 1 (length :-pos)) ;TODO: check if between last two @s
910 (not \;-pos)
911 (not <-pos))
912 (goto-char (point-min))
913 (mail-extr-delete-char 1)
914 (setq <-pos (list (point)))
915 (insert ?<)))
916
917 ;; If < but no >, insert > in rightmost possible position
918 (cond ((and <-pos
919 (null >-pos))
920 (goto-char (point-max))
921 (setq >-pos (list (point)))
922 (insert ?>)))
923
924 ;; If > but no <, replace > with space.
925 (cond ((and >-pos
926 (null <-pos))
927 (mail-extr-nuke-char-at (car >-pos))
928 (setq >-pos nil)))
929
930 ;; Turn >-pos and <-pos into non-lists
931 (setq >-pos (car >-pos)
932 <-pos (car <-pos))
933
934 ;; Trim other punctuation lists of items outside < > pair to handle
935 ;; stupid MTAs.
936 (cond (<-pos ; don't need to check >-pos also
937 ;; handle bozo software that violates RFC 822 by sticking
938 ;; punctuation marks outside of a < > pair
939 (mail-extr-nuke-outside-range @-pos <-pos >-pos t)
940 ;; RFC 822 says nothing about these two outside < >, but
941 ;; remove those positions from the lists to make things
942 ;; easier.
943 (mail-extr-nuke-outside-range !-pos <-pos >-pos t)
944 (mail-extr-nuke-outside-range %-pos <-pos >-pos t)))
945
946 ;; Check for : that indicates GROUP list and for : part of
947 ;; ROUTE-ADDR spec.
948 ;; Can't possibly be more than two :. Nuke any extra.
949 (while :-pos
950 (setq temp (car :-pos)
951 :-pos (cdr :-pos))
952 (cond ((and <-pos >-pos
953 (> temp <-pos)
954 (< temp >-pos))
955 (if (or route-addr-:-pos
956 (< (length @-pos) 2)
957 (> temp (car @-pos))
958 (< temp (nth 1 @-pos)))
959 (mail-extr-nuke-char-at temp)
960 (setq route-addr-:-pos temp)))
961 ((or (not <-pos)
962 (and <-pos
963 (< temp <-pos)))
964 (setq group-:-pos temp))))
965
966 ;; Nuke any ; that is in or to the left of a < > pair or to the left
967 ;; of a GROUP starting :. Also, there may only be one ;.
968 (while \;-pos
969 (setq temp (car \;-pos)
970 \;-pos (cdr \;-pos))
971 (cond ((and <-pos >-pos
972 (> temp <-pos)
973 (< temp >-pos))
974 (mail-extr-nuke-char-at temp))
975 ((and (or (not group-:-pos)
976 (> temp group-:-pos))
977 (not group-\;-pos))
978 (setq group-\;-pos temp))))
979
980 ;; Nuke unmatched GROUP syntax characters.
981 (cond ((and group-:-pos (not group-\;-pos))
982 ;; *** Do I really need to erase it?
983 (mail-extr-nuke-char-at group-:-pos)
984 (setq group-:-pos nil)))
985 (cond ((and group-\;-pos (not group-:-pos))
986 ;; *** Do I really need to erase it?
987 (mail-extr-nuke-char-at group-\;-pos)
988 (setq group-\;-pos nil)))
989
990 ;; Handle junk like ";@host.company.dom" that sendmail adds.
991 ;; **** should I remember comment positions?
992 (cond
993 (group-\;-pos
994 ;; this is fine for now
995 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t)
996 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t)
997 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t)
998 (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t)
999 (and last-real-pos
1000 (> last-real-pos (1+ group-\;-pos))
1001 (setq last-real-pos (1+ group-\;-pos)))
1002 ;; *** This may be wrong:
1003 (and cend
1004 (> cend group-\;-pos)
1005 (setq cend nil
1006 cbeg nil))
1007 (and quote-end
1008 (> quote-end group-\;-pos)
1009 (setq quote-end nil
1010 quote-beg nil))
1011 ;; This was both wrong and unnecessary:
1012 ;;(narrow-to-region (point-min) group-\;-pos)
1013
1014 ;; *** The entire handling of GROUP addresses seems rather lame.
1015 ;; *** It deserves a complete rethink, except that these addresses
1016 ;; *** are hardly ever seen.
1017 ))
1018
1019 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
1020 ;; others.
1021 ;; Hell, go ahead an nuke all of the commas.
1022 ;; **** This will cause problems when we start handling commas in
1023 ;; the PHRASE part .... no it won't ... yes it will ... ?????
1024 (mail-extr-nuke-outside-range comma-pos 1 1)
1025
1026 ;; can only have multiple @s inside < >. The fact that some MTAs
1027 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
1028 ;; handled above.
1029
1030 ;; Locate PHRASE part of ROUTE-ADDR.
1031 (cond (<-pos
1032 (goto-char <-pos)
1033 (mail-extr-skip-whitespace-backward)
1034 (setq phrase-end (point))
1035 (goto-char (or ;;group-:-pos
1036 (point-min)))
1037 (mail-extr-skip-whitespace-forward)
1038 (if (< (point) phrase-end)
1039 (setq phrase-beg (point))
1040 (setq phrase-end nil))))
1041
1042 ;; handle ROUTE-ADDRS with real ROUTEs.
1043 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
1044 ;; any % or ! must be semantically meaningless.
1045 ;; TODO: do this processing into canonicalization buffer
1046 (cond (route-addr-:-pos
1047 (setq !-pos nil
1048 %-pos nil
1049 >-pos (copy-marker >-pos)
1050 route-addr-:-pos (copy-marker route-addr-:-pos))
1051 (goto-char >-pos)
1052 (insert-before-markers ?X)
1053 (goto-char (car @-pos))
1054 (while (setq @-pos (cdr @-pos))
1055 (mail-extr-delete-char 1)
1056 (setq %-pos (cons (point-marker) %-pos))
1057 (insert "%")
1058 (goto-char (1- >-pos))
788 (save-excursion 1059 (save-excursion
789 (forward-char 1) 1060 (insert-buffer-substring extraction-buffer
790 (mail-extr-skip-whitespace-forward) 1061 (car @-pos) route-addr-:-pos)
791 (not (eq ?\) (char-after (point)))))) 1062 (delete-region (car @-pos) route-addr-:-pos))
792 (setq cbeg (point))) 1063 (or (cdr @-pos)
793 ;; TODO: don't record if unbalanced 1064 (setq saved-@-pos (list (point)))))
794 (or (mail-extr-safe-move-sexp 1) 1065 (setq @-pos saved-@-pos)
795 (forward-char 1)) 1066 (goto-char >-pos)
796 (set-syntax-table mail-extr-address-syntax-table) 1067 (mail-extr-delete-char -1)
797 (if (and cbeg 1068 (mail-extr-nuke-char-at route-addr-:-pos)
798 (not cend)) 1069 (mail-extr-demarkerize route-addr-:-pos)
799 (setq cend (point)))) 1070 (setq route-addr-:-pos nil
800 ;; quoted text 1071 >-pos (mail-extr-demarkerize >-pos)
801 ((eq char ?\") 1072 %-pos (mapcar 'mail-extr-demarkerize %-pos))))
802 ;; only record the first non-empty quote's position 1073
803 (if (and (not quote-beg) 1074 ;; de-listify @-pos
804 (save-excursion 1075 (setq @-pos (car @-pos))
805 (forward-char 1) 1076
806 (mail-extr-skip-whitespace-forward) 1077 ;; TODO: remove comments in the middle of an address
807 (not (eq ?\" (char-after (point)))))) 1078
808 (setq quote-beg (point))) 1079 (save-excursion
809 ;; TODO: don't record if unbalanced 1080 (set-buffer canonicalization-buffer)
810 (or (mail-extr-safe-move-sexp 1) 1081
811 (forward-char 1)) 1082 (widen)
812 (if (and quote-beg 1083 (erase-buffer)
813 (not quote-end)) 1084 (insert-buffer-substring extraction-buffer)
814 (setq quote-end (point)))) 1085
815 ;; domain literals 1086 (if <-pos
816 ((eq char ?\[) 1087 (narrow-to-region (progn
817 (set-syntax-table mail-extr-address-domain-literal-syntax-table) 1088 (goto-char (1+ <-pos))
818 (or (mail-extr-safe-move-sexp 1) 1089 (mail-extr-skip-whitespace-forward)
819 (forward-char 1)) 1090 (point))
820 (set-syntax-table mail-extr-address-syntax-table)) 1091 >-pos)
821 ;; commas delimit addresses when outside < > pairs. 1092 (if (and first-real-pos last-real-pos)
822 ((and (eq char ?,) 1093 (narrow-to-region first-real-pos last-real-pos)
823 (or (and (null <-pos) 1094 ;; ****** Oh no! What if the address is completely empty!
824 ;; Handle ROUTE-ADDR address that is missing its <. 1095 ;; *** Is this correct?
825 (not (eq ?@ (char-after (1+ (point)))))) 1096 (narrow-to-region (point-max) (point-max))
826 (and >-pos 1097 ))
827 ;; handle weird munged addresses 1098
828 ;; BUG FIX: This test was reversed. Thanks to the 1099 (and @-pos %-pos
829 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au> 1100 (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
830 ;; for discovering this! 1101 (and %-pos !-pos
831 (< (mail-extr-last <-pos) (car >-pos))))) 1102 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
832 ;; It'd be great if some day this worked, but for now, punt. 1103 (and @-pos !-pos (not %-pos)
833 ;; (setq multiple-addresses t) 1104 (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
834 ;; ;; *** Why do I want this: 1105
835 ;; (mail-extr-delete-char 1) 1106 ;; Error condition:?? (and %-pos (not @-pos))
836 ;; (narrow-to-region (point-min) (point)) 1107
837 (delete-region (point) (point-max)) 1108 ;; WARNING: THIS CODE IS DUPLICATED BELOW.
838 (setq char ?\() ; HAVE I NO SHAME?? 1109 (cond ((and %-pos
839 ) 1110 (not @-pos))
840 ;; record the position of various interesting chars, determine 1111 (goto-char (car %-pos))
841 ;; legality later. 1112 (mail-extr-delete-char 1)
842 ((setq record-pos-symbol 1113 (setq @-pos (point))
843 (cdr (assq char 1114 (insert "@")
844 '((?< . <-pos) (?> . >-pos) (?@ . @-pos) 1115 (setq %-pos (cdr %-pos))))
845 (?: . :-pos) (?, . comma-pos) (?! . !-pos) 1116
846 (?% . %-pos) (?\; . \;-pos))))) 1117 (if mail-extr-mangle-uucp
847 (set record-pos-symbol 1118 (cond (!-pos
848 (cons (point) (symbol-value record-pos-symbol))) 1119 ;; **** I don't understand this save-restriction and the
849 (forward-char 1)) 1120 ;; narrow-to-region inside it. Why did I do that?
850 ((eq char ?.) 1121 (save-restriction
851 (forward-char 1)) 1122 (cond ((and @-pos
852 ((memq char '( 1123 mail-extr-@-binds-tighter-than-!)
853 ;; comment terminator illegal 1124 (goto-char @-pos)
854 ?\) 1125 (setq %-pos (cons (point) %-pos)
855 ;; domain literal terminator illegal 1126 @-pos nil)
856 ?\] 1127 (mail-extr-delete-char 1)
857 ;; \ allowed only within quoted strings, 1128 (insert "%")
858 ;; domain literals, and comments 1129 (setq insert-point (point-max)))
859 ?\\ 1130 (mail-extr-@-binds-tighter-than-!
860 )) 1131 (setq insert-point (point-max)))
861 (mail-extr-nuke-char-at (point)) 1132 (%-pos
862 (forward-char 1)) 1133 (setq insert-point (mail-extr-last %-pos)
863 (t 1134 saved-%-pos (mapcar 'mail-extr-markerize %-pos)
864 (forward-word 1))) 1135 %-pos nil
865 (or (eq char ?\() 1136 @-pos (mail-extr-markerize @-pos)))
866 ;; At the end of first address of a multiple address header. 1137 (@-pos
867 (and (eq char ?,) 1138 (setq insert-point @-pos)
868 (eobp)) 1139 (setq @-pos (mail-extr-markerize @-pos)))
869 (setq last-real-pos (point)))) 1140 (t
870 1141 (setq insert-point (point-max))))
871 ;; Use only the leftmost <, if any. Replace all others with spaces. 1142 (narrow-to-region (point-min) insert-point)
872 (while (cdr <-pos) 1143 (setq saved-!-pos (car !-pos))
873 (mail-extr-nuke-char-at (car <-pos)) 1144 (while !-pos
874 (setq <-pos (cdr <-pos))) 1145 (goto-char (point-max))
875 1146 (cond ((and (not @-pos)
876 ;; Use only the rightmost >, if any. Replace all others with spaces. 1147 (not (cdr !-pos)))
877 (while (cdr >-pos) 1148 (setq @-pos (point))
878 (mail-extr-nuke-char-at (nth 1 >-pos)) 1149 (insert-before-markers "@ "))
879 (setcdr >-pos (nthcdr 2 >-pos))) 1150 (t
880 1151 (setq %-pos (cons (point) %-pos))
881 ;; If multiple @s and a :, but no < and >, insert around buffer. 1152 (insert-before-markers "% ")))
882 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc 1153 (backward-char 1)
883 ;; This commonly happens on the UUCP "From " line. Ugh. 1154 (insert-buffer-substring
884 (cond ((and (> (length @-pos) 1) 1155 (current-buffer)
885 (eq 1 (length :-pos)) ;TODO: check if between last two @s 1156 (if (nth 1 !-pos)
886 (not \;-pos) 1157 (1+ (nth 1 !-pos))
887 (not <-pos)) 1158 (point-min))
888 (goto-char (point-min)) 1159 (car !-pos))
889 (mail-extr-delete-char 1) 1160 (mail-extr-delete-char 1)
890 (setq <-pos (list (point))) 1161 (or (save-excursion
891 (insert ?<))) 1162 (mail-extr-safe-move-sexp -1)
892 1163 (mail-extr-skip-whitespace-backward)
893 ;; If < but no >, insert > in rightmost possible position 1164 (eq ?. (preceding-char)))
894 (cond ((and <-pos 1165 (insert-before-markers
895 (null >-pos)) 1166 (if (save-excursion
896 (goto-char (point-max)) 1167 (mail-extr-skip-whitespace-backward)
897 (setq >-pos (list (point))) 1168 (eq ?. (preceding-char)))
898 (insert ?>))) 1169 ""
899 1170 ".")
900 ;; If > but no <, replace > with space. 1171 "uucp"))
901 (cond ((and >-pos 1172 (setq !-pos (cdr !-pos))))
902 (null <-pos)) 1173 (and saved-%-pos
903 (mail-extr-nuke-char-at (car >-pos)) 1174 (setq %-pos (append (mapcar 'mail-extr-demarkerize
904 (setq >-pos nil))) 1175 saved-%-pos)
905 1176 %-pos)))
906 ;; Turn >-pos and <-pos into non-lists 1177 (setq @-pos (mail-extr-demarkerize @-pos))
907 (setq >-pos (car >-pos) 1178 (narrow-to-region (1+ saved-!-pos) (point-max)))))
908 <-pos (car <-pos)) 1179
909 1180 ;; WARNING: THIS CODE IS DUPLICATED ABOVE.
910 ;; Trim other punctuation lists of items outside < > pair to handle 1181 (cond ((and %-pos
911 ;; stupid MTAs. 1182 (not @-pos))
912 (cond (<-pos ; don't need to check >-pos also 1183 (goto-char (car %-pos))
913 ;; handle bozo software that violates RFC 822 by sticking 1184 (mail-extr-delete-char 1)
914 ;; punctuation marks outside of a < > pair 1185 (setq @-pos (point))
915 (mail-extr-nuke-outside-range @-pos <-pos >-pos t) 1186 (insert "@")
916 ;; RFC 822 says nothing about these two outside < >, but 1187 (setq %-pos (cdr %-pos))))
917 ;; remove those positions from the lists to make things 1188
918 ;; easier. 1189 (setq %-pos (nreverse %-pos))
919 (mail-extr-nuke-outside-range !-pos <-pos >-pos t) 1190 (cond (%-pos ; implies @-pos valid
920 (mail-extr-nuke-outside-range %-pos <-pos >-pos t))) 1191 (setq temp %-pos)
921 1192 (catch 'truncated
922 ;; Check for : that indicates GROUP list and for : part of 1193 (while temp
923 ;; ROUTE-ADDR spec. 1194 (goto-char (or (nth 1 temp)
924 ;; Can't possibly be more than two :. Nuke any extra. 1195 @-pos))
925 (while :-pos 1196 (mail-extr-skip-whitespace-backward)
926 (setq temp (car :-pos) 1197 (save-excursion
927 :-pos (cdr :-pos)) 1198 (mail-extr-safe-move-sexp -1)
928 (cond ((and <-pos >-pos 1199 (setq domain-pos (point))
929 (> temp <-pos) 1200 (mail-extr-skip-whitespace-backward)
930 (< temp >-pos)) 1201 (setq \.-pos (eq ?. (preceding-char))))
931 (if (or route-addr-:-pos 1202 (cond ((and \.-pos
932 (< (length @-pos) 2) 1203 ;; #### string consing
933 (> temp (car @-pos)) 1204 (let ((s (intern-soft
934 (< temp (nth 1 @-pos))) 1205 (buffer-substring domain-pos (point))
935 (mail-extr-nuke-char-at temp) 1206 mail-extr-all-top-level-domains)))
936 (setq route-addr-:-pos temp))) 1207 (and s (get s 'domain-name))))
937 ((or (not <-pos) 1208 (narrow-to-region (point-min) (point))
938 (and <-pos 1209 (goto-char (car temp))
939 (< temp <-pos))) 1210 (mail-extr-delete-char 1)
940 (setq group-:-pos temp)))) 1211 (setq @-pos (point))
941 1212 (setcdr temp nil)
942 ;; Nuke any ; that is in or to the left of a < > pair or to the left 1213 (setq %-pos (delq @-pos %-pos))
943 ;; of a GROUP starting :. Also, there may only be one ;. 1214 (insert "@")
944 (while \;-pos 1215 (throw 'truncated t)))
945 (setq temp (car \;-pos) 1216 (setq temp (cdr temp))))))
946 \;-pos (cdr \;-pos)) 1217 (setq mbox-beg (point-min)
947 (cond ((and <-pos >-pos 1218 mbox-end (if %-pos (car %-pos)
948 (> temp <-pos) 1219 (or @-pos
949 (< temp >-pos)) 1220 (point-max)))))
950 (mail-extr-nuke-char-at temp)) 1221
951 ((and (or (not group-:-pos) 1222 ;; Done canonicalizing address.
952 (> temp group-:-pos)) 1223 ;; We are now back in extraction-buffer.
953 (not group-\;-pos)) 1224
954 (setq group-\;-pos temp)))) 1225 ;; Decide what part of the address to search to find the full name.
955 1226 (cond (
956 ;; Nuke unmatched GROUP syntax characters. 1227 ;; Example: "First M. Last" <fml@foo.bar.dom>
957 (cond ((and group-:-pos (not group-\;-pos)) 1228 (and phrase-beg
958 ;; *** Do I really need to erase it? 1229 (eq quote-beg phrase-beg)
959 (mail-extr-nuke-char-at group-:-pos) 1230 (<= quote-end phrase-end))
960 (setq group-:-pos nil))) 1231 (narrow-to-region (1+ quote-beg) (1- quote-end))
961 (cond ((and group-\;-pos (not group-:-pos)) 1232 (mail-extr-undo-backslash-quoting (point-min) (point-max)))
962 ;; *** Do I really need to erase it? 1233
963 (mail-extr-nuke-char-at group-\;-pos) 1234 ;; Example: First Last <fml@foo.bar.dom>
964 (setq group-\;-pos nil))) 1235 (phrase-beg
965 1236 (narrow-to-region phrase-beg phrase-end))
966 ;; Handle junk like ";@host.company.dom" that sendmail adds. 1237
967 ;; **** should I remember comment positions? 1238 ;; Example: fml@foo.bar.dom (First M. Last)
968 (cond 1239 (cbeg
969 (group-\;-pos 1240 (narrow-to-region (1+ cbeg) (1- cend))
970 ;; this is fine for now 1241 (mail-extr-undo-backslash-quoting (point-min) (point-max))
971 (mail-extr-nuke-outside-range !-pos group-:-pos group-\;-pos t) 1242
972 (mail-extr-nuke-outside-range @-pos group-:-pos group-\;-pos t) 1243 ;; Deal with spacing problems
973 (mail-extr-nuke-outside-range %-pos group-:-pos group-\;-pos t) 1244 (goto-char (point-min))
974 (mail-extr-nuke-outside-range comma-pos group-:-pos group-\;-pos t) 1245 ;;; (cond ((not (search-forward " " nil t))
975 (and last-real-pos 1246 ;;; (goto-char (point-min))
976 (> last-real-pos (1+ group-\;-pos)) 1247 ;;; (cond ((search-forward "_" nil t)
977 (setq last-real-pos (1+ group-\;-pos))) 1248 ;;; ;; Handle the *idiotic* use of underlines as spaces.
978 ;; *** This may be wrong: 1249 ;;; ;; Example: fml@foo.bar.dom (First_M._Last)
979 (and cend 1250 ;;; (goto-char (point-min))
980 (> cend group-\;-pos) 1251 ;;; (while (search-forward "_" nil t)
981 (setq cend nil 1252 ;;; (replace-match " " t)))
982 cbeg nil)) 1253 ;;; ((search-forward "." nil t)
983 (and quote-end 1254 ;;; ;; Fix . used as space
984 (> quote-end group-\;-pos) 1255 ;;; ;; Example: danj1@cb.att.com (daniel.jacobson)
985 (setq quote-end nil 1256 ;;; (goto-char (point-min))
986 quote-beg nil)) 1257 ;;; (while (re-search-forward mail-extr-bad-dot-pattern nil t)
987 ;; This was both wrong and unnecessary: 1258 ;;; (replace-match "\\1 \\2" t))))))
988 ;;(narrow-to-region (point-min) group-\;-pos) 1259 )
989 1260
990 ;; *** The entire handling of GROUP addresses seems rather lame. 1261 ;; Otherwise we try to get the name from the mailbox portion
991 ;; *** It deserves a complete rethink, except that these addresses 1262 ;; of the address.
992 ;; *** are hardly ever seen. 1263 ;; Example: First_M_Last@foo.bar.dom
993 )) 1264 (t
994 1265 ;; *** Work in canon buffer instead? No, can't. Hmm.
995 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
996 ;; others.
997 ;; Hell, go ahead an nuke all of the commas.
998 ;; **** This will cause problems when we start handling commas in
999 ;; the PHRASE part .... no it won't ... yes it will ... ?????
1000 (mail-extr-nuke-outside-range comma-pos 1 1)
1001
1002 ;; can only have multiple @s inside < >. The fact that some MTAs
1003 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
1004 ;; handled above.
1005
1006 ;; Locate PHRASE part of ROUTE-ADDR.
1007 (cond (<-pos
1008 (goto-char <-pos)
1009 (mail-extr-skip-whitespace-backward)
1010 (setq phrase-end (point))
1011 (goto-char (or ;;group-:-pos
1012 (point-min)))
1013 (mail-extr-skip-whitespace-forward)
1014 (if (< (point) phrase-end)
1015 (setq phrase-beg (point))
1016 (setq phrase-end nil))))
1017
1018 ;; handle ROUTE-ADDRS with real ROUTEs.
1019 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
1020 ;; any % or ! must be semantically meaningless.
1021 ;; TODO: do this processing into canonicalization buffer
1022 (cond (route-addr-:-pos
1023 (setq !-pos nil
1024 %-pos nil
1025 >-pos (copy-marker >-pos)
1026 route-addr-:-pos (copy-marker route-addr-:-pos))
1027 (goto-char >-pos)
1028 (insert-before-markers ?X)
1029 (goto-char (car @-pos))
1030 (while (setq @-pos (cdr @-pos))
1031 (mail-extr-delete-char 1)
1032 (setq %-pos (cons (point-marker) %-pos))
1033 (insert "%")
1034 (goto-char (1- >-pos))
1035 (save-excursion
1036 (insert-buffer-substring extraction-buffer
1037 (car @-pos) route-addr-:-pos)
1038 (delete-region (car @-pos) route-addr-:-pos))
1039 (or (cdr @-pos)
1040 (setq saved-@-pos (list (point)))))
1041 (setq @-pos saved-@-pos)
1042 (goto-char >-pos)
1043 (mail-extr-delete-char -1)
1044 (mail-extr-nuke-char-at route-addr-:-pos)
1045 (mail-extr-demarkerize route-addr-:-pos)
1046 (setq route-addr-:-pos nil
1047 >-pos (mail-extr-demarkerize >-pos)
1048 %-pos (mapcar 'mail-extr-demarkerize %-pos))))
1049
1050 ;; de-listify @-pos
1051 (setq @-pos (car @-pos))
1052
1053 ;; TODO: remove comments in the middle of an address
1054
1055 (set-buffer canonicalization-buffer)
1056 (fundamental-mode)
1057 (kill-all-local-variables)
1058 (buffer-disable-undo canonicalization-buffer)
1059 (set-syntax-table mail-extr-address-syntax-table)
1060 (setq case-fold-search nil)
1061
1062 (widen)
1063 (erase-buffer)
1064 (insert-buffer-substring extraction-buffer)
1065
1066 (if <-pos
1067 (narrow-to-region (progn
1068 (goto-char (1+ <-pos))
1069 (mail-extr-skip-whitespace-forward)
1070 (point))
1071 >-pos)
1072 (if (and first-real-pos last-real-pos)
1073 (narrow-to-region first-real-pos last-real-pos)
1074 ;; ****** Oh no! What if the address is completely empty!
1075 ;; *** Is this correct?
1076 (narrow-to-region (point-max) (point-max))
1077 ))
1078
1079 (and @-pos %-pos
1080 (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
1081 (and %-pos !-pos
1082 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
1083 (and @-pos !-pos (not %-pos)
1084 (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
1085
1086 ;; Error condition:?? (and %-pos (not @-pos))
1087
1088 ;; WARNING: THIS CODE IS DUPLICATED BELOW.
1089 (cond ((and %-pos
1090 (not @-pos))
1091 (goto-char (car %-pos))
1092 (mail-extr-delete-char 1)
1093 (setq @-pos (point))
1094 (insert "@")
1095 (setq %-pos (cdr %-pos))))
1096
1097 (if mail-extr-mangle-uucp
1098 (cond (!-pos
1099 ;; **** I don't understand this save-restriction and the
1100 ;; narrow-to-region inside it. Why did I do that?
1101 (save-restriction
1102 (cond ((and @-pos
1103 mail-extr-@-binds-tighter-than-!)
1104 (goto-char @-pos)
1105 (setq %-pos (cons (point) %-pos)
1106 @-pos nil)
1107 (mail-extr-delete-char 1)
1108 (insert "%")
1109 (setq insert-point (point-max)))
1110 (mail-extr-@-binds-tighter-than-!
1111 (setq insert-point (point-max)))
1112 (%-pos
1113 (setq insert-point (mail-extr-last %-pos)
1114 saved-%-pos (mapcar 'mail-extr-markerize %-pos)
1115 %-pos nil
1116 @-pos (mail-extr-markerize @-pos)))
1117 (@-pos
1118 (setq insert-point @-pos)
1119 (setq @-pos (mail-extr-markerize @-pos)))
1120 (t
1121 (setq insert-point (point-max))))
1122 (narrow-to-region (point-min) insert-point)
1123 (setq saved-!-pos (car !-pos))
1124 (while !-pos
1125 (goto-char (point-max)) 1266 (goto-char (point-max))
1126 (cond ((and (not @-pos) 1267 (narrow-to-region (point) (point))
1127 (not (cdr !-pos))) 1268 (insert-buffer-substring canonicalization-buffer
1128 (setq @-pos (point)) 1269 mbox-beg mbox-end)
1129 (insert-before-markers "@ ")) 1270 (goto-char (point-min))
1130 (t 1271
1131 (setq %-pos (cons (point) %-pos)) 1272 ;; Example: First_Last.XXX@foo.bar.dom
1132 (insert-before-markers "% "))) 1273 (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
1133 (backward-char 1) 1274
1134 (insert-buffer-substring 1275 (goto-char (point-min))
1135 (current-buffer) 1276
1136 (if (nth 1 !-pos) 1277 (if (not mail-extr-mangle-uucp)
1137 (1+ (nth 1 !-pos)) 1278 (modify-syntax-entry ?! "w" (syntax-table)))
1138 (point-min)) 1279
1139 (car !-pos)) 1280 (while (progn
1140 (mail-extr-delete-char 1) 1281 (mail-extr-skip-whitespace-forward)
1141 (or (save-excursion 1282 (not (eobp)))
1142 (mail-extr-safe-move-sexp -1) 1283 (setq char (char-after (point)))
1143 (mail-extr-skip-whitespace-backward) 1284 (cond
1144 (eq ?. (preceding-char))) 1285 ((eq char ?\")
1145 (insert-before-markers 1286 (setq quote-beg (point))
1146 (if (save-excursion 1287 (or (mail-extr-safe-move-sexp 1)
1147 (mail-extr-skip-whitespace-backward) 1288 ;; TODO: handle this error condition!!!!!
1148 (eq ?. (preceding-char))) 1289 (forward-char 1))
1149 "" 1290 ;; take into account deletions
1150 ".") 1291 (setq quote-end (- (point) 2))
1151 "uucp")) 1292 (save-excursion
1152 (setq !-pos (cdr !-pos)))) 1293 (backward-char 1)
1153 (and saved-%-pos 1294 (mail-extr-delete-char 1)
1154 (setq %-pos (append (mapcar 'mail-extr-demarkerize 1295 (goto-char quote-beg)
1155 saved-%-pos) 1296 (or (eobp)
1156 %-pos))) 1297 (mail-extr-delete-char 1)))
1157 (setq @-pos (mail-extr-demarkerize @-pos)) 1298 (mail-extr-undo-backslash-quoting quote-beg quote-end)
1158 (narrow-to-region (1+ saved-!-pos) (point-max))))) 1299 (or (eq ?\ (char-after (point)))
1159 1300 (insert " "))
1160 ;; WARNING: THIS CODE IS DUPLICATED ABOVE. 1301 ;; (setq mailbox-name-processed-flag t)
1161 (cond ((and %-pos 1302 (setq \.-ends-name t))
1162 (not @-pos)) 1303 ((eq char ?.)
1163 (goto-char (car %-pos)) 1304 (if (memq (char-after (1+ (point))) '(?_ ?=))
1164 (mail-extr-delete-char 1) 1305 (progn
1165 (setq @-pos (point)) 1306 (forward-char 1)
1166 (insert "@") 1307 (mail-extr-delete-char 1)
1167 (setq %-pos (cdr %-pos)))) 1308 (insert ?\ ))
1168 1309 (if \.-ends-name
1169 (setq %-pos (nreverse %-pos)) 1310 (narrow-to-region (point-min) (point))
1170 (cond (%-pos ; implies @-pos valid 1311 (mail-extr-delete-char 1)
1171 (setq temp %-pos) 1312 (insert " ")))
1172 (catch 'truncated 1313 ;; (setq mailbox-name-processed-flag t)
1173 (while temp 1314 )
1174 (goto-char (or (nth 1 temp) 1315 ((memq (char-syntax char) '(?. ?\\))
1175 @-pos)) 1316 (mail-extr-delete-char 1)
1176 (mail-extr-skip-whitespace-backward) 1317 (insert " ")
1318 ;; (setq mailbox-name-processed-flag t)
1319 )
1320 (t
1321 (setq atom-beg (point))
1322 (forward-word 1)
1323 (setq atom-end (point))
1324 (goto-char atom-beg)
1325 (save-restriction
1326 (narrow-to-region atom-beg atom-end)
1327 (cond
1328
1329 ;; Handle X.400 addresses encoded in RFC-822.
1330 ;; *** Shit! This has to handle the case where it is
1331 ;; *** embedded in a quote too!
1332 ;; *** Shit! The input is being broken up into atoms
1333 ;; *** by periods!
1334 ((looking-at mail-extr-x400-encoded-address-pattern)
1335
1336 ;; Copy the contents of the individual fields that
1337 ;; might hold name data to the beginning.
1338 (mapcar
1339 (function
1340 (lambda (field-pattern)
1341 (cond
1342 ((save-excursion
1343 (re-search-forward field-pattern nil t))
1344 (insert-buffer-substring (current-buffer)
1345 (match-beginning 1)
1346 (match-end 1))
1347 (insert " ")))))
1348 (list mail-extr-x400-encoded-address-given-name-pattern
1349 mail-extr-x400-encoded-address-surname-pattern
1350 mail-extr-x400-encoded-address-full-name-pattern))
1351
1352 ;; Discard the rest, since it contains stuff like
1353 ;; routing information, not part of a name.
1354 (mail-extr-skip-whitespace-backward)
1355 (delete-region (point) (point-max))
1356
1357 ;; Handle periods used for spacing.
1358 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1359 (replace-match "\\1 \\2" t))
1360
1361 ;; (setq mailbox-name-processed-flag t)
1362 )
1363
1364 ;; Handle normal addresses.
1365 (t
1366 (goto-char (point-min))
1367 ;; Handle _ and = used for spacing.
1368 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
1369 (replace-match "\\1 " t)
1370 ;; (setq mailbox-name-processed-flag t)
1371 )
1372 (goto-char (point-max))))))))
1373
1374 ;; undo the dirty deed
1375 (if (not mail-extr-mangle-uucp)
1376 (modify-syntax-entry ?! "." (syntax-table)))
1377 ;;
1378 ;; If we derived the name from the mailbox part of the address,
1379 ;; and we only got one word out of it, don't treat that as a
1380 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
1381 ;; (if (not mailbox-name-processed-flag)
1382 ;; (delete-region (point-min) (point-max)))
1383 ))
1384
1385 (set-syntax-table mail-extr-address-text-syntax-table)
1386
1387 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
1388 (goto-char (point-min))
1389
1390 ;; If name is "First Last" and userid is "F?L", then assume
1391 ;; the middle initial is the second letter in the userid.
1392 ;; Initial code by Jamie Zawinski <jwz@lucid.com>
1393 ;; *** Make it work when there's a suffix as well.
1394 (goto-char (point-min))
1395 (cond ((and mail-extr-guess-middle-initial
1396 (not disable-initial-guessing-flag)
1397 (eq 3 (- mbox-end mbox-beg))
1398 (progn
1399 (goto-char (point-min))
1400 (looking-at mail-extr-two-name-pattern)))
1401 (setq fi (char-after (match-beginning 0))
1402 li (char-after (match-beginning 3)))
1177 (save-excursion 1403 (save-excursion
1178 (mail-extr-safe-move-sexp -1) 1404 (set-buffer canonicalization-buffer)
1179 (setq domain-pos (point)) 1405 ;; char-equal is ignoring case here, so no need to upcase
1180 (mail-extr-skip-whitespace-backward) 1406 ;; or downcase.
1181 (setq \.-pos (eq ?. (preceding-char)))) 1407 (let ((case-fold-search t))
1182 (cond ((and \.-pos 1408 (and (char-equal fi (char-after mbox-beg))
1183 ;; #### string consing 1409 (char-equal li (char-after (1- mbox-end)))
1184 (let ((s (intern-soft 1410 (setq mi (char-after (1+ mbox-beg))))))
1185 (buffer-substring domain-pos (point)) 1411 (cond ((and mi
1186 mail-extr-all-top-level-domains))) 1412 ;; TODO: use better table than syntax table
1187 (and s (get s 'domain-name)))) 1413 (eq ?w (char-syntax mi)))
1188 (narrow-to-region (point-min) (point)) 1414 (goto-char (match-beginning 3))
1189 (goto-char (car temp)) 1415 (insert (upcase mi) ". ")))))
1190 (mail-extr-delete-char 1) 1416
1191 (setq @-pos (point)) 1417 ;; Nuke name if it is the same as mailbox name.
1192 (setcdr temp nil) 1418 (let ((buffer-length (- (point-max) (point-min)))
1193 (setq %-pos (delq @-pos %-pos)) 1419 (i 0)
1194 (insert "@") 1420 (names-match-flag t))
1195 (throw 'truncated t))) 1421 (cond ((and (> buffer-length 0)
1196 (setq temp (cdr temp)))))) 1422 (eq buffer-length (- mbox-end mbox-beg)))
1197 (setq mbox-beg (point-min) 1423 (goto-char (point-max))
1198 mbox-end (if %-pos (car %-pos) 1424 (insert-buffer-substring canonicalization-buffer
1199 (or @-pos 1425 mbox-beg mbox-end)
1200 (point-max)))) 1426 (while (and names-match-flag
1201 1427 (< i buffer-length))
1202 ;; Done canonicalizing address. 1428 (or (eq (downcase (char-after (+ i (point-min))))
1203 1429 (downcase
1204 (set-buffer extraction-buffer) 1430 (char-after (+ i buffer-length (point-min)))))
1205 1431 (setq names-match-flag nil))
1206 ;; Decide what part of the address to search to find the full name. 1432 (setq i (1+ i)))
1207 (cond ( 1433 (delete-region (+ (point-min) buffer-length) (point-max))
1208 ;; Example: "First M. Last" <fml@foo.bar.dom> 1434 (if names-match-flag
1209 (and phrase-beg 1435 (narrow-to-region (point) (point))))))
1210 (eq quote-beg phrase-beg) 1436
1211 (<= quote-end phrase-end)) 1437 ;; Nuke name if it's just one word.
1212 (narrow-to-region (1+ quote-beg) (1- quote-end)) 1438 (goto-char (point-min))
1213 (mail-extr-undo-backslash-quoting (point-min) (point-max))) 1439 (and mail-extr-ignore-single-names
1214 1440 (not (re-search-forward "[- ]" nil t))
1215 ;; Example: First Last <fml@foo.bar.dom> 1441 (narrow-to-region (point) (point)))
1216 (phrase-beg 1442
1217 (narrow-to-region phrase-beg phrase-end)) 1443 ;; Record the result
1218 1444 (setq value-list
1219 ;; Example: fml@foo.bar.dom (First M. Last) 1445 (cons (list (if (not (= (point-min) (point-max)))
1220 (cbeg 1446 (buffer-string))
1221 (narrow-to-region (1+ cbeg) (1- cend)) 1447 (save-excursion
1222 (mail-extr-undo-backslash-quoting (point-min) (point-max)) 1448 (set-buffer canonicalization-buffer)
1223 1449 (if (not (= (point-min) (point-max)))
1224 ;; Deal with spacing problems 1450 (buffer-string))))
1225 (goto-char (point-min)) 1451 value-list))
1226 ; (cond ((not (search-forward " " nil t)) 1452
1227 ; (goto-char (point-min)) 1453 ;; Unless one address is all we wanted,
1228 ; (cond ((search-forward "_" nil t) 1454 ;; delete this one from extraction-buffer
1229 ; ;; Handle the *idiotic* use of underlines as spaces. 1455 ;; and get ready to extract the next address.
1230 ; ;; Example: fml@foo.bar.dom (First_M._Last) 1456 (when all
1231 ; (goto-char (point-min)) 1457 (if end-of-address
1232 ; (while (search-forward "_" nil t) 1458 (narrow-to-region 1 end-of-address)
1233 ; (replace-match " " t))) 1459 (widen))
1234 ; ((search-forward "." nil t) 1460 (delete-region (point-min) (point-max))
1235 ; ;; Fix . used as space 1461 (widen))
1236 ; ;; Example: danj1@cb.att.com (daniel.jacobson) 1462 )))
1237 ; (goto-char (point-min)) 1463 (if all (nreverse value-list) (car value-list))
1238 ; (while (re-search-forward mail-extr-bad-dot-pattern nil t) 1464 ))
1239 ; (replace-match "\\1 \\2" t))))))
1240 )
1241
1242 ;; Otherwise we try to get the name from the mailbox portion
1243 ;; of the address.
1244 ;; Example: First_M_Last@foo.bar.dom
1245 (t
1246 ;; *** Work in canon buffer instead? No, can't. Hmm.
1247 (goto-char (point-max))
1248 (narrow-to-region (point) (point))
1249 (insert-buffer-substring canonicalization-buffer
1250 mbox-beg mbox-end)
1251 (goto-char (point-min))
1252
1253 ;; Example: First_Last.XXX@foo.bar.dom
1254 (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
1255
1256 (goto-char (point-min))
1257
1258 (if (not mail-extr-mangle-uucp)
1259 (modify-syntax-entry ?! "w" (syntax-table)))
1260
1261 (while (progn
1262 (mail-extr-skip-whitespace-forward)
1263 (not (eobp)))
1264 (setq char (char-after (point)))
1265 (cond
1266 ((eq char ?\")
1267 (setq quote-beg (point))
1268 (or (mail-extr-safe-move-sexp 1)
1269 ;; TODO: handle this error condition!!!!!
1270 (forward-char 1))
1271 ;; take into account deletions
1272 (setq quote-end (- (point) 2))
1273 (save-excursion
1274 (backward-char 1)
1275 (mail-extr-delete-char 1)
1276 (goto-char quote-beg)
1277 (or (eobp)
1278 (mail-extr-delete-char 1)))
1279 (mail-extr-undo-backslash-quoting quote-beg quote-end)
1280 (or (eq ?\ (char-after (point)))
1281 (insert " "))
1282 ;; (setq mailbox-name-processed-flag t)
1283 (setq \.-ends-name t))
1284 ((eq char ?.)
1285 (if (memq (char-after (1+ (point))) '(?_ ?=))
1286 (progn
1287 (forward-char 1)
1288 (mail-extr-delete-char 1)
1289 (insert ?\ ))
1290 (if \.-ends-name
1291 (narrow-to-region (point-min) (point))
1292 (mail-extr-delete-char 1)
1293 (insert " ")))
1294 ;; (setq mailbox-name-processed-flag t)
1295 )
1296 ((memq (char-syntax char) '(?. ?\\))
1297 (mail-extr-delete-char 1)
1298 (insert " ")
1299 ;; (setq mailbox-name-processed-flag t)
1300 )
1301 (t
1302 (setq atom-beg (point))
1303 (forward-word 1)
1304 (setq atom-end (point))
1305 (goto-char atom-beg)
1306 (save-restriction
1307 (narrow-to-region atom-beg atom-end)
1308 (cond
1309
1310 ;; Handle X.400 addresses encoded in RFC-822.
1311 ;; *** Shit! This has to handle the case where it is
1312 ;; *** embedded in a quote too!
1313 ;; *** Shit! The input is being broken up into atoms
1314 ;; *** by periods!
1315 ((looking-at mail-extr-x400-encoded-address-pattern)
1316
1317 ;; Copy the contents of the individual fields that
1318 ;; might hold name data to the beginning.
1319 (mapcar
1320 (function
1321 (lambda (field-pattern)
1322 (cond
1323 ((save-excursion
1324 (re-search-forward field-pattern nil t))
1325 (insert-buffer-substring (current-buffer)
1326 (match-beginning 1)
1327 (match-end 1))
1328 (insert " ")))))
1329 (list mail-extr-x400-encoded-address-given-name-pattern
1330 mail-extr-x400-encoded-address-surname-pattern
1331 mail-extr-x400-encoded-address-full-name-pattern))
1332
1333 ;; Discard the rest, since it contains stuff like
1334 ;; routing information, not part of a name.
1335 (mail-extr-skip-whitespace-backward)
1336 (delete-region (point) (point-max))
1337
1338 ;; Handle periods used for spacing.
1339 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
1340 (replace-match "\\1 \\2" t))
1341
1342 ;; (setq mailbox-name-processed-flag t)
1343 )
1344
1345 ;; Handle normal addresses.
1346 (t
1347 (goto-char (point-min))
1348 ;; Handle _ and = used for spacing.
1349 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
1350 (replace-match "\\1 " t)
1351 ;; (setq mailbox-name-processed-flag t)
1352 )
1353 (goto-char (point-max))))))))
1354
1355 ;; undo the dirty deed
1356 (if (not mail-extr-mangle-uucp)
1357 (modify-syntax-entry ?! "." (syntax-table)))
1358 ;;
1359 ;; If we derived the name from the mailbox part of the address,
1360 ;; and we only got one word out of it, don't treat that as a
1361 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
1362 ;; (if (not mailbox-name-processed-flag)
1363 ;; (delete-region (point-min) (point-max)))
1364 ))
1365
1366 (set-syntax-table mail-extr-address-text-syntax-table)
1367
1368 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
1369 (goto-char (point-min))
1370
1371 ;; If name is "First Last" and userid is "F?L", then assume
1372 ;; the middle initial is the second letter in the userid.
1373 ;; Initial code by Jamie Zawinski <jwz@lucid.com>
1374 ;; *** Make it work when there's a suffix as well.
1375 (goto-char (point-min))
1376 (cond ((and mail-extr-guess-middle-initial
1377 (not disable-initial-guessing-flag)
1378 (eq 3 (- mbox-end mbox-beg))
1379 (progn
1380 (goto-char (point-min))
1381 (looking-at mail-extr-two-name-pattern)))
1382 (setq fi (char-after (match-beginning 0))
1383 li (char-after (match-beginning 3)))
1384 (save-excursion
1385 (set-buffer canonicalization-buffer)
1386 ;; char-equal is ignoring case here, so no need to upcase
1387 ;; or downcase.
1388 (let ((case-fold-search t))
1389 (and (char-equal fi (char-after mbox-beg))
1390 (char-equal li (char-after (1- mbox-end)))
1391 (setq mi (char-after (1+ mbox-beg))))))
1392 (cond ((and mi
1393 ;; TODO: use better table than syntax table
1394 (eq ?w (char-syntax mi)))
1395 (goto-char (match-beginning 3))
1396 (insert (upcase mi) ". ")))))
1397
1398 ;; Nuke name if it is the same as mailbox name.
1399 (let ((buffer-length (- (point-max) (point-min)))
1400 (i 0)
1401 (names-match-flag t))
1402 (cond ((and (> buffer-length 0)
1403 (eq buffer-length (- mbox-end mbox-beg)))
1404 (goto-char (point-max))
1405 (insert-buffer-substring canonicalization-buffer
1406 mbox-beg mbox-end)
1407 (while (and names-match-flag
1408 (< i buffer-length))
1409 (or (eq (downcase (char-after (+ i (point-min))))
1410 (downcase
1411 (char-after (+ i buffer-length (point-min)))))
1412 (setq names-match-flag nil))
1413 (setq i (1+ i)))
1414 (delete-region (+ (point-min) buffer-length) (point-max))
1415 (if names-match-flag
1416 (narrow-to-region (point) (point))))))
1417
1418 ;; Nuke name if it's just one word.
1419 (goto-char (point-min))
1420 (and mail-extr-ignore-single-names
1421 (not (re-search-forward "[- ]" nil t))
1422 (narrow-to-region (point) (point)))
1423
1424 ;; Result
1425 (list (if (not (= (point-min) (point-max)))
1426 (buffer-string))
1427 (progn
1428 (set-buffer canonicalization-buffer)
1429 (if (not (= (point-min) (point-max)))
1430 (buffer-string))))
1431 )))
1432 1465
1433 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) 1466 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
1434 (let ((word-count 0) 1467 (let ((word-count 0)
1435 (case-fold-search nil) 1468 (case-fold-search nil)
1436 mixed-case-flag lower-case-flag ;;upper-case-flag 1469 mixed-case-flag lower-case-flag ;;upper-case-flag