Mercurial > emacs
annotate lisp/mail/rfc822.el @ 95697:419c6420a317
*** empty log message ***
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 09 Jun 2008 07:14:37 +0000 |
parents | ef65fa4dca3b |
children | 8b4f705fb54e |
rev | line source |
---|---|
658
7cbd4fcd8b0f
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
584
diff
changeset
|
1 ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike |
7cbd4fcd8b0f
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
584
diff
changeset
|
2 |
74509 | 3 ;; Copyright (C) 1986, 1987, 1990, 2001, 2002, 2003, 2004, |
79712 | 4 ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc. |
845 | 5 |
789
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
6 ;; Author: Richard Mlynarik <mly@eddie.mit.edu> |
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
7 ;; Maintainer: FSF |
814
38b2499cb3e9
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
804
diff
changeset
|
8 ;; Keywords: mail |
789
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
9 |
43 | 10 ;; This file is part of GNU Emacs. |
11 | |
94674
ef65fa4dca3b
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
12 ;; GNU Emacs is free software: you can redistribute it and/or modify |
43 | 13 ;; it under the terms of the GNU General Public License as published by |
94674
ef65fa4dca3b
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
14 ;; the Free Software Foundation, either version 3 of the License, or |
ef65fa4dca3b
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
15 ;; (at your option) any later version. |
43 | 16 |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
94674
ef65fa4dca3b
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
43 | 24 |
2315
9e7ec92a4fdf
Added or corrected Commentary headers
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
845
diff
changeset
|
25 ;;; Commentary: |
9e7ec92a4fdf
Added or corrected Commentary headers
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
845
diff
changeset
|
26 |
9e7ec92a4fdf
Added or corrected Commentary headers
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
845
diff
changeset
|
27 ;; Support functions for parsing RFC-822 headers, used by mail and news |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
21165
diff
changeset
|
28 ;; modes. |
2315
9e7ec92a4fdf
Added or corrected Commentary headers
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
845
diff
changeset
|
29 |
789
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
30 ;;; Code: |
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
31 |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
32 (defvar rfc822-address-start) |
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
33 |
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
34 ;; uses rfc822-address-start free, throws to address |
43 | 35 (defun rfc822-bad-address (reason) |
36 (save-restriction | |
37 (insert "_^_") | |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
38 (narrow-to-region rfc822-address-start |
43 | 39 (if (re-search-forward "[,;]" nil t) |
40 (max (point-min) (1- (point))) | |
41 (point-max))) | |
42 ;; make the error string be suitable for inclusion in (...) | |
43 (let ((losers '("\\" "(" ")" "\n"))) | |
44 (while losers | |
45 (goto-char (point-min)) | |
46 (while (search-forward (car losers) nil t) | |
47 (backward-char 1) | |
48 (insert ?\\) | |
49 (forward-char 1)) | |
50 (setq losers (cdr losers)))) | |
51 (goto-char (point-min)) (insert "(Unparsable address -- " | |
52 reason | |
15882
874f0cc6c431
(rfc822-bad-address): Don't put a newline into the
Richard M. Stallman <rms@gnu.org>
parents:
14169
diff
changeset
|
53 ": \"") |
43 | 54 (goto-char (point-max)) (insert "\")")) |
55 (rfc822-nuke-whitespace) | |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
56 (throw 'address (buffer-substring rfc822-address-start (point)))) |
43 | 57 |
58 (defun rfc822-nuke-whitespace (&optional leave-space) | |
59 (let (ch) | |
60 (while (cond ((eobp) | |
61 nil) | |
62 ((= (setq ch (following-char)) ?\() | |
63 (forward-char 1) | |
64 (while (if (eobp) | |
65 (rfc822-bad-address "Unbalanced comment (...)") | |
66 (/= (setq ch (following-char)) ?\))) | |
67 (cond ((looking-at "[^()\\]+") | |
68 (replace-match "")) | |
69 ((= ch ?\() | |
70 (rfc822-nuke-whitespace)) | |
71 ((< (point) (1- (point-max))) | |
72 (delete-char 2)) | |
73 (t | |
74 (rfc822-bad-address "orphaned backslash")))) | |
75 ;; delete remaining "()" | |
76 (forward-char -1) | |
77 (delete-char 2) | |
78 t) | |
49846
7e279209f48b
(rfc822-nuke-whitespace): Fix character constant.
Juanma Barranquero <lekktu@gmail.com>
parents:
49598
diff
changeset
|
79 ((memq ch '(?\ ?\t ?\n)) |
43 | 80 (delete-region (point) |
81 (progn (skip-chars-forward " \t\n") (point))) | |
82 t) | |
83 (t | |
84 nil))) | |
85 (or (not leave-space) | |
86 (eobp) | |
87 (bobp) | |
88 (= (preceding-char) ?\ ) | |
89 (insert ?\ )))) | |
90 | |
91 (defun rfc822-looking-at (regex &optional leave-space) | |
92 (if (cond ((stringp regex) | |
93 (if (looking-at regex) | |
94 (progn (goto-char (match-end 0)) | |
95 t))) | |
96 (t | |
97 (if (and (not (eobp)) | |
98 (= (following-char) regex)) | |
99 (progn (forward-char 1) | |
100 t)))) | |
101 (let ((tem (match-data))) | |
102 (rfc822-nuke-whitespace leave-space) | |
21165
480d7479ccac
(rfc822-looking-at): store-match-data => set-match-data.
Richard M. Stallman <rms@gnu.org>
parents:
20683
diff
changeset
|
103 (set-match-data tem) |
43 | 104 t))) |
105 | |
106 (defun rfc822-snarf-word () | |
107 ;; word is atom | quoted-string | |
108 (cond ((= (following-char) ?\") | |
109 ;; quoted-string | |
110 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"") | |
111 (rfc822-bad-address "Unterminated quoted string"))) | |
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
112 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+") |
43 | 113 ;; atom |
114 ) | |
115 (t | |
116 (rfc822-bad-address "Rubbish in address")))) | |
117 | |
118 (defun rfc822-snarf-words () | |
119 (rfc822-snarf-word) | |
120 (while (rfc822-looking-at ?.) | |
121 (rfc822-snarf-word))) | |
122 | |
123 (defun rfc822-snarf-subdomain () | |
124 ;; sub-domain is domain-ref | domain-literal | |
125 (cond ((= (following-char) ?\[) | |
126 ;; domain-ref | |
127 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]") | |
128 (rfc822-bad-address "Unterminated domain literal [...]"))) | |
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
129 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+") |
43 | 130 ;; domain-literal = atom |
131 ) | |
132 (t | |
133 (rfc822-bad-address "Rubbish in host/domain specification")))) | |
134 | |
135 (defun rfc822-snarf-domain () | |
136 (rfc822-snarf-subdomain) | |
137 (while (rfc822-looking-at ?.) | |
138 (rfc822-snarf-subdomain))) | |
139 | |
140 (defun rfc822-snarf-frob-list (name separator terminator snarfer | |
141 &optional return) | |
142 (let ((first t) | |
143 (list ()) | |
144 tem) | |
145 (while (cond ((eobp) | |
146 (rfc822-bad-address | |
147 (format "End of addresses in middle of %s" name))) | |
148 ((rfc822-looking-at terminator) | |
149 nil) | |
150 ((rfc822-looking-at separator) | |
151 ;; multiple separators are allowed and do nothing. | |
152 (while (rfc822-looking-at separator)) | |
153 t) | |
154 (first | |
155 t) | |
156 (t | |
157 (rfc822-bad-address | |
158 (format "Gubbish in middle of %s" name)))) | |
159 (setq tem (funcall snarfer) | |
160 first nil) | |
161 (and return tem | |
162 (setq list (if (listp tem) | |
163 (nconc (reverse tem) list) | |
164 (cons tem list))))) | |
165 (nreverse list))) | |
166 | |
167 ;; return either an address (a string) or a list of addresses | |
168 (defun rfc822-addresses-1 (&optional allow-groups) | |
169 ;; Looking for an rfc822 `address' | |
170 ;; Either a group (1*word ":" [#mailbox] ";") | |
171 ;; or a mailbox (addr-spec | 1*word route-addr) | |
172 ;; addr-spec is (local-part "@" domain) | |
173 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">") | |
174 ;; local-part is (word *("." word)) | |
175 ;; word is (atom | quoted-string) | |
176 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)") | |
177 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+ | |
178 ;; domain is sub-domain *("." sub-domain) | |
179 ;; sub-domain is domain-ref | domain-literal | |
180 ;; domain-literal is "[" *(dtext | quoted-pair) "]" | |
181 ;; dtext is "[^][\\n" | |
182 ;; domain-ref is atom | |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
183 (let ((rfc822-address-start (point)) |
43 | 184 (n 0)) |
185 (catch 'address | |
186 ;; optimize common cases: | |
187 ;; foo | |
188 ;; foo.bar@bar.zap | |
189 ;; followed by "\\'\\|,\\|([^()\\]*)\\'" | |
190 ;; other common cases are: | |
191 ;; foo bar <foo.bar@baz.zap> | |
192 ;; "foo bar" <foo.bar@baz.zap> | |
193 ;; those aren't hacked yet. | |
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
194 (if (and (rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037 ()<>@,;:\\\"]+\\)" t) |
43 | 195 (progn (or (eobp) |
196 (rfc822-looking-at ?,)))) | |
197 (progn | |
198 ;; rfc822-looking-at may have inserted a space | |
199 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1)) | |
200 ;; relying on the fact that rfc822-looking-at <char> | |
201 ;; doesn't mung match-data | |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
202 (throw 'address (buffer-substring rfc822-address-start (match-end 0))))) |
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
203 (goto-char rfc822-address-start) |
43 | 204 (while t |
205 (cond ((and (= n 1) (rfc822-looking-at ?@)) | |
206 ;; local-part@domain | |
207 (rfc822-snarf-domain) | |
208 (throw 'address | |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
209 (buffer-substring rfc822-address-start (point)))) |
43 | 210 ((rfc822-looking-at ?:) |
211 (cond ((not allow-groups) | |
212 (rfc822-bad-address "A group name may not appear here")) | |
213 ((= n 0) | |
214 (rfc822-bad-address "No name for :...; group"))) | |
215 ;; group | |
216 (throw 'address | |
217 ;; return a list of addresses | |
218 (rfc822-snarf-frob-list ":...; group" ?\, ?\; | |
219 'rfc822-addresses-1 t))) | |
220 ((rfc822-looking-at ?<) | |
221 (let ((start (point)) | |
222 (strip t)) | |
223 (cond ((rfc822-looking-at ?>) | |
224 ;; empty path | |
225 ()) | |
226 ((and (not (eobp)) (= (following-char) ?\@)) | |
227 ;; <@foo.bar,@baz:quux@abcd.efg> | |
228 (rfc822-snarf-frob-list "<...> address" ?\, ?\: | |
229 (function (lambda () | |
230 (if (rfc822-looking-at ?\@) | |
231 (rfc822-snarf-domain) | |
232 (rfc822-bad-address | |
233 "Gubbish in route-addr"))))) | |
234 (rfc822-snarf-words) | |
235 (or (rfc822-looking-at ?@) | |
236 (rfc822-bad-address "Malformed <..@..> address")) | |
237 (rfc822-snarf-domain) | |
238 (setq strip nil)) | |
239 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@)) | |
240 ; allow <foo> (losing unix seems to do this) | |
241 (rfc822-snarf-domain))) | |
242 (let ((end (point))) | |
243 (if (rfc822-looking-at ?\>) | |
244 (throw 'address | |
245 (buffer-substring (if strip start (1- start)) | |
246 (if strip end (1+ end)))) | |
247 (rfc822-bad-address "Unterminated <...> address"))))) | |
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
248 ((looking-at "[^][\000-\037 ()<>@,;:\\.]") |
43 | 249 ;; this allows "." to be part of the words preceding |
250 ;; an addr-spec, since many broken mailers output | |
251 ;; "Hern K. Herklemeyer III | |
252 ;; <yank@megadeath.dod.gods-own-country>" | |
125 | 253 (let ((again t)) |
254 (while again | |
255 (or (= n 0) (bobp) (= (preceding-char) ?\ ) | |
256 (insert ?\ )) | |
126 | 257 (rfc822-snarf-words) |
125 | 258 (setq n (1+ n)) |
259 (setq again (or (rfc822-looking-at ?.) | |
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
260 (looking-at "[^][\000-\037 ()<>@,;:\\.]")))))) |
43 | 261 ((= n 0) |
262 (throw 'address nil)) | |
263 ((= n 1) ; allow "foo" (losing unix seems to do this) | |
264 (throw 'address | |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
265 (buffer-substring rfc822-address-start (point)))) |
125 | 266 ((> n 1) |
267 (rfc822-bad-address "Missing comma between addresses or badly-formatted address")) | |
268 ((or (eobp) (= (following-char) ?,)) | |
43 | 269 (rfc822-bad-address "Missing comma or route-spec")) |
270 (t | |
271 (rfc822-bad-address "Strange character or missing comma"))))))) | |
272 | |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
21165
diff
changeset
|
273 |
43 | 274 (defun rfc822-addresses (header-text) |
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
275 (if (string-match "\\`[ \t]*\\([^][\000-\037 ()<>@,;:\\\".]+\\)[ \t]*\\'" |
43 | 276 header-text) |
277 ;; Make very simple case moderately fast. | |
278 (list (substring header-text (match-beginning 1) (match-end 1))) | |
279 (let ((buf (generate-new-buffer " rfc822"))) | |
280 (unwind-protect | |
281 (save-excursion | |
282 (set-buffer buf) | |
283 (make-local-variable 'case-fold-search) | |
284 (setq case-fold-search nil) ;For speed(?) | |
285 (insert header-text) | |
286 ;; unfold continuation lines | |
287 (goto-char (point-min)) | |
288 | |
289 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) | |
290 (replace-match "\\1 " t)) | |
291 | |
292 (goto-char (point-min)) | |
293 (let ((list ()) | |
294 tem | |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
295 rfc822-address-start); this is for rfc822-bad-address |
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
296 (rfc822-nuke-whitespace) |
43 | 297 (while (not (eobp)) |
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
298 (setq rfc822-address-start (point)) |
43 | 299 (setq tem |
300 (catch 'address ; this is for rfc822-bad-address | |
301 (cond ((rfc822-looking-at ?\,) | |
302 nil) | |
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
303 ((looking-at "[][\000-\037@;:\\.>)]") |
43 | 304 (forward-char) |
305 (rfc822-bad-address | |
306 (format "Strange character \\%c found" | |
307 (preceding-char)))) | |
308 (t | |
309 (rfc822-addresses-1 t))))) | |
310 (cond ((null tem)) | |
311 ((stringp tem) | |
312 (setq list (cons tem list))) | |
313 (t | |
314 (setq list (nconc (nreverse tem) list))))) | |
315 (nreverse list))) | |
316 (and buf (kill-buffer buf)))))) | |
317 | |
584 | 318 (provide 'rfc822) |
319 | |
93975
1e3a407766b9
Fix up comment convention on the arch-tag lines.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
79712
diff
changeset
|
320 ;; arch-tag: 5d388a24-e173-40fb-9b8e-85269de44b37 |
658
7cbd4fcd8b0f
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
584
diff
changeset
|
321 ;;; rfc822.el ends here |