Mercurial > emacs
comparison lisp/mh-e/mh-junk.el @ 50702:7dd3d5eae9c7
Upgraded to MH-E version 7.3.
See etc/MH-E-NEWS and lisp/mh-e/ChangeLog for details.
author | Bill Wohler <wohler@newt.com> |
---|---|
date | Fri, 25 Apr 2003 05:52:00 +0000 |
parents | |
children | 695cf19ef79e |
comparison
equal
deleted
inserted
replaced
50701:cb5f0a5d5b36 | 50702:7dd3d5eae9c7 |
---|---|
1 ;;; mh-junk.el --- Interface to anti-spam measures | |
2 | |
3 ;; Copyright (C) 2003 Free Software Foundation, Inc. | |
4 | |
5 ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>, | |
6 ;; Bill Wohler <wohler@newt.com> | |
7 ;; Maintainer: Bill Wohler <wohler@newt.com> | |
8 ;; Keywords: mail, spam | |
9 | |
10 ;; This file is part of GNU Emacs. | |
11 | |
12 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 ;; it under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
25 ;; Boston, MA 02111-1307, USA. | |
26 | |
27 ;;; Commentary: | |
28 | |
29 ;; Spam handling in MH-E. | |
30 | |
31 ;;; Change Log: | |
32 | |
33 ;;; Code: | |
34 | |
35 (require 'mh-e) | |
36 | |
37 ;; Interactive functions callable from the folder buffer | |
38 ;;;###mh-autoload | |
39 (defun mh-junk-blacklist (msg-or-seq) | |
40 "Blacklist MSG-OR-SEQ as spam. | |
41 Default is the displayed message. | |
42 If optional prefix argument is provided, then prompt for the message sequence. | |
43 If variable `transient-mark-mode' is non-nil and the mark is active, then the | |
44 selected region is blacklisted. | |
45 In a program, MSG-OR-SEQ can be a message number, a list of message numbers, a | |
46 region in a cons cell, or a sequence. | |
47 | |
48 First the appropriate function is called depending on the value of | |
49 `mh-junk-choice'. Then if `mh-junk-mail-folder' is a string then the message is | |
50 refiled to that folder. If nil, the message is deleted. | |
51 | |
52 To change the spam program being used, customize `mh-junk-program'. Directly | |
53 setting `mh-junk-choice' is not recommended. | |
54 | |
55 The documentation for the following functions describes what setup is needed | |
56 for the different spam fighting programs: | |
57 | |
58 - `mh-bogofilter-blacklist' | |
59 - `mh-spamprobe-blacklist' | |
60 - `mh-spamassassin-blacklist'" | |
61 (interactive (list (mh-interactive-msg-or-seq "Blacklist"))) | |
62 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist)))) | |
63 (unless blacklist-func | |
64 (error "Customize `mh-junk-program' appropriately")) | |
65 (let ((dest (cond ((null mh-junk-mail-folder) nil) | |
66 ((equal mh-junk-mail-folder "") "+") | |
67 ((eq (aref mh-junk-mail-folder 0) ?+) | |
68 mh-junk-mail-folder) | |
69 ((eq (aref mh-junk-mail-folder 0) ?@) | |
70 (concat mh-current-folder "/" | |
71 (substring mh-junk-mail-folder 1))) | |
72 (t (concat "+" mh-junk-mail-folder))))) | |
73 (mh-iterate-on-msg-or-seq msg msg-or-seq | |
74 (funcall (symbol-function blacklist-func) msg) | |
75 (if dest | |
76 (mh-refile-a-msg nil (intern dest)) | |
77 (mh-delete-a-msg nil))) | |
78 (mh-next-msg)))) | |
79 | |
80 ;;;###mh-autoload | |
81 (defun mh-junk-whitelist (msg-or-seq) | |
82 "Whitelist MSG-OR-SEQ incorrectly classified as spam. | |
83 Default is the displayed message. | |
84 If optional prefix argument is provided, then prompt for the message sequence. | |
85 If variable `transient-mark-mode' is non-nil and the mark is active, then the | |
86 selected region is whitelisted. | |
87 In a program, MSG-OR-SEQ can be a message number, a list of message numbers, a | |
88 region in a cons cell, or a sequence. | |
89 | |
90 First the appropriate function is called depending on the value of | |
91 `mh-junk-choice'. Then the message is refiled to `mh-inbox'. | |
92 | |
93 To change the spam program being used, customize `mh-junk-program'. Directly | |
94 setting `mh-junk-choice' is not recommended." | |
95 (interactive (list (mh-interactive-msg-or-seq "Whitelist"))) | |
96 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist)))) | |
97 (unless whitelist-func | |
98 (error "Customize `mh-junk-program' appropriately")) | |
99 (mh-iterate-on-msg-or-seq msg msg-or-seq | |
100 (funcall (symbol-function whitelist-func) msg) | |
101 (mh-refile-a-msg nil (intern mh-inbox))) | |
102 (mh-next-msg))) | |
103 | |
104 | |
105 | |
106 ;; Bogofilter Interface | |
107 | |
108 (defvar mh-bogofilter-executable (executable-find "bogofilter")) | |
109 | |
110 (defun mh-bogofilter-blacklist (msg) | |
111 "Classify MSG as spam. | |
112 Tell bogofilter that the message is spam. | |
113 | |
114 Bogofilter is a Bayesian spam filtering program. Get it from your local | |
115 distribution or from: | |
116 http://bogofilter.sourceforge.net/ | |
117 | |
118 You first need to teach bogofilter. This is done by running | |
119 | |
120 bogofilter -n < good-message | |
121 | |
122 on every good message, and | |
123 | |
124 bogofilter -s < spam-message | |
125 | |
126 on every spam message. Most Bayesian filters need 1000 to 5000 of each to | |
127 start doing a good job. | |
128 | |
129 To use bogofilter, add the following .procmailrc recipes which you can also | |
130 find in the bogofilter man page: | |
131 | |
132 # Bogofilter | |
133 :0fw | |
134 | bogofilter -u -e -p | |
135 | |
136 :0 | |
137 * ^X-Bogosity: Yes, tests=bogofilter | |
138 $SPAM | |
139 | |
140 Bogofilter continues to feed the messages it classifies back into its | |
141 database. Occasionally it misses, and those messages need to be reclassified. | |
142 MH-E can do this for you. Use \\[mh-junk-blacklist] to reclassify messges in | |
143 your +inbox as spam, and \\[mh-junk-whitelist] to reclassify messages in your | |
144 spambox as good messages." | |
145 (unless mh-bogofilter-executable | |
146 (error "Couldn't find the bogofilter executable")) | |
147 (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
148 (call-process mh-bogofilter-executable msg-file 0 nil "-Ns"))) | |
149 | |
150 (defun mh-bogofilter-whitelist (msg) | |
151 "Reinstate incorrectly filtered MSG. | |
152 Train bogofilter to think of the message as non-spam." | |
153 (unless mh-bogofilter-executable | |
154 (error "Couldn't find the bogofilter executable")) | |
155 (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
156 (call-process mh-bogofilter-executable msg-file 0 nil "-Sn"))) | |
157 | |
158 | |
159 | |
160 ;; Spamprobe Interface | |
161 | |
162 (defvar mh-spamprobe-executable (executable-find "spamprobe")) | |
163 | |
164 (defun mh-spamprobe-blacklist (msg) | |
165 "Classify MSG as spam. | |
166 Tell spamprobe that the message is spam. | |
167 | |
168 Spamprobe is a Bayesian spam filtering program. More info about the program can | |
169 be found at: | |
170 http://spamprobe.sourceforge.net | |
171 | |
172 Here is a procmail recipe to stores incoming spam mail into the folder +spam | |
173 and good mail in /home/user/Mail/mdrop/mbox. This recipe is provided as an | |
174 example in the spamprobe man page. | |
175 | |
176 PATH=/bin:/usr/bin:/usr/local/bin | |
177 DEFAULT=/home/user/Mail/mdrop/mbox | |
178 SPAM=/home/user/Mail/spam/. | |
179 | |
180 # Spamprobe filtering | |
181 :0 | |
182 SCORE=| spamprobe receive | |
183 :0 wf | |
184 | formail -I \"X-SpamProbe: $SCORE\" | |
185 :0 a: | |
186 *^X-SpamProbe: SPAM | |
187 $SPAM | |
188 | |
189 Occasionally some good mail gets misclassified as spam. You can use | |
190 \\[mh-junk-whitelist] to reclassify that as good mail." | |
191 (unless mh-spamprobe-executable | |
192 (error "Couldn't find the spamprobe executable")) | |
193 (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
194 (call-process mh-spamprobe-executable msg-file 0 nil "spam"))) | |
195 | |
196 (defun mh-spamprobe-whitelist (msg) | |
197 "Reinstate incorrectly filtered MSG. | |
198 Train spamprobe to think of the message as non-spam." | |
199 (unless mh-spamprobe-executable | |
200 (error "Couldn't find the spamprobe executable")) | |
201 (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
202 (call-process mh-spamprobe-executable msg-file 0 nil "good"))) | |
203 | |
204 | |
205 | |
206 ;; Spamassassin Interface | |
207 | |
208 (defvar mh-spamassassin-executable (executable-find "spamassassin")) | |
209 (defvar mh-sa-learn-executable (executable-find "sa-learn")) | |
210 | |
211 (defun mh-spamassassin-blacklist (msg) | |
212 "Blacklist MSG. | |
213 This is done by sending the message to Razor and by appending the sender to | |
214 ~/.spamassassin/user_prefs in a blacklist_from rule. If sa-learn is available, | |
215 the message is also recategorized as spam. | |
216 | |
217 Spamassassin is an excellent spam filter. For more information, see: | |
218 http://spamassassin.org/. | |
219 | |
220 I ran \"spamassassin -t\" on every mail message in my archive and ran an | |
221 analysis in Gnumeric to find that the standard deviation of good mail | |
222 scored under 5 (coincidentally, the spamassassin default for \"spam\"). | |
223 | |
224 Furthermore, I observed that there weren't any messages with a score of 8 | |
225 or more that were interesting, so I added a couple of points to be | |
226 conservative and send any message with a score of 10 or more down the | |
227 drain. You might want to use a score of 12 or 13 to be really conservative. | |
228 I have found that this really decreases the amount of junk to review. | |
229 | |
230 Messages with a score of 5-9 are set aside for later review. The major | |
231 weakness of rules-based filters is a plethora of false positives\; I catch one | |
232 or two legitimate messages in here a week, so it is worthwhile to check. | |
233 | |
234 You might choose to do this analysis yourself to pick a good score for | |
235 deleting spam sight unseen, or you might pick a score out of a hat, or you | |
236 might choose to be very conservative and not delete any messages at all. | |
237 | |
238 Based upon this discussion, here is what the associated ~/.procmailrc | |
239 entries look like. These rules appear before my list filters so that spam | |
240 sent to mailing lists gets pruned too. | |
241 | |
242 # | |
243 # Spam | |
244 # | |
245 :0fw | |
246 | spamc | |
247 | |
248 # Anything with a spam level of 10 or more is junked immediately. | |
249 :0: | |
250 * ^X-Spam-Level: .......... | |
251 /dev/null | |
252 | |
253 :0 | |
254 * ^X-Spam-Status: Yes | |
255 $SPAM | |
256 | |
257 If you don't use \"spamc\", use \"spamassassin -P -a\". | |
258 | |
259 A handful of spam does find its way into +inbox. In this case, use | |
260 \\[mh-junk-blacklist] to add a \"blacklist_from\" line to | |
261 ~/spamassassin/user_prefs, delete the message, and send the message to the | |
262 Razor, so that others might not see this spam. | |
263 | |
264 Over time, you see some patterns in the blacklisted addresses and can | |
265 replace several lines with wildcards. For example, it is clear that High | |
266 Speed Media is the biggest bunch of jerks on the Net. Here are some of the | |
267 entries I have for them, and the list continues to grow. | |
268 | |
269 blacklist_from *@*-hsm-*.com | |
270 blacklist_from *@*182*643*.com | |
271 blacklist_from *@*antarhsm*.com | |
272 blacklist_from *@*h*speed* | |
273 blacklist_from *@*hsm*182*.com | |
274 blacklist_from *@*hsm*643*.com | |
275 blacklist_from *@*hsmridi2983cslt227.com | |
276 blacklist_from *@*list*hsm*.com | |
277 blacklist_from *@h*s*media* | |
278 blacklist_from *@hsmdrct.com | |
279 blacklist_from *@hsmridi2983csltsite.com | |
280 | |
281 The function `mh-spamassassin-identify-spammers' is provided that shows the | |
282 frequency counts of the host and domain names in your blacklist_from | |
283 entries. This can be helpful when editing the blacklist_from entries. | |
284 | |
285 In versions of spamassassin (2.50 and on) that support a Bayesian classifier, | |
286 \\[mh-junk-blacklist] uses the sa-learn program to recategorize the message as | |
287 spam. Neither MH-E, nor spamassassin, rebuilds the database after adding | |
288 words, so you will need to run \"sa-learn --rebuild\" periodically. This can | |
289 be done by adding the following to your crontab: | |
290 | |
291 0 * * * * sa-learn --rebuild > /dev/null 2>&1" | |
292 (unless mh-spamassassin-executable | |
293 (error "Couldn't find the spamassassin executable")) | |
294 (let ((current-folder mh-current-folder) | |
295 (msg-file (mh-msg-filename msg mh-current-folder)) | |
296 (sender)) | |
297 (save-excursion | |
298 (message "Giving this message the Razor...") | |
299 (mh-truncate-log-buffer) | |
300 (call-process mh-spamassassin-executable msg-file mh-log-buffer nil | |
301 "--report" "--remove-from-whitelist") | |
302 (when mh-sa-learn-executable | |
303 (message "Recategorizing this message as spam...") | |
304 (call-process mh-sa-learn-executable msg-file mh-log-buffer nil | |
305 "--single" "--spam" "--local --no-rebuild")) | |
306 (message "Blacklisting address...") | |
307 (set-buffer (get-buffer-create mh-temp-buffer)) | |
308 (erase-buffer) | |
309 (call-process (expand-file-name mh-scan-prog mh-progs) nil t nil | |
310 (format "%s" msg) current-folder | |
311 "-format" "%<(mymbox{from})%|%(addr{from})%>") | |
312 (goto-char (point-min)) | |
313 (if (search-forward-regexp "^\\(.+\\)$" nil t) | |
314 (progn | |
315 (setq sender (match-string 0)) | |
316 (mh-spamassassin-add-rule "blacklist_from" sender) | |
317 (message "Blacklisting address...done")) | |
318 (message "Blacklisting address...not done (from my address)"))))) | |
319 | |
320 (defun mh-spamassassin-whitelist (msg) | |
321 "Whitelist MSG. | |
322 Add a whitelist_from rule to the ~/.spamassassin/user_prefs file. If sa-learn | |
323 is available, then the message is recategorized as ham." | |
324 (unless mh-spamassassin-executable | |
325 (error "Couldn't find the spamassassin executable")) | |
326 (let ((msg-file (mh-msg-filename msg mh-current-folder)) | |
327 (show-buffer (get-buffer mh-show-buffer)) | |
328 from) | |
329 (save-excursion | |
330 (set-buffer (get-buffer-create mh-temp-buffer)) | |
331 (erase-buffer) | |
332 (message "Removing spamassassin markup from message...") | |
333 (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil | |
334 "--remove-markup") | |
335 (if show-buffer | |
336 (kill-buffer show-buffer)) | |
337 (write-file msg-file) | |
338 (when mh-sa-learn-executable | |
339 (message "Recategorizing this message as ham...") | |
340 (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil | |
341 "--single" "--ham" "--local --no-rebuild")) | |
342 (message "Whitelisting address...") | |
343 (setq from (car (ietf-drums-parse-address (mh-get-header-field "From:")))) | |
344 (kill-buffer nil) | |
345 (unless (equal from "") | |
346 (mh-spamassassin-add-rule "whitelist_from" from)) | |
347 (message "Whitelisting address...done")))) | |
348 | |
349 (defun mh-spamassassin-add-rule (rule body) | |
350 "Add a new rule to ~/.spamassassin/user_prefs. | |
351 The name of the rule is RULE and its body is BODY." | |
352 (save-window-excursion | |
353 (let* ((line (format "%s\t%s\n" rule body)) | |
354 (case-fold-search t) | |
355 (file (expand-file-name "~/.spamassassin/user_prefs")) | |
356 (buffer-exists (find-buffer-visiting file))) | |
357 (find-file file) | |
358 (if (not (search-forward (format "\n%s" line) nil t)) | |
359 (progn | |
360 (goto-char (point-max)) | |
361 (insert (if (bolp) "" "\n") line) | |
362 (save-buffer))) | |
363 (if (not buffer-exists) | |
364 (kill-buffer nil))))) | |
365 | |
366 (defun mh-spamassassin-identify-spammers () | |
367 "Identifies spammers who are repeat offenders. | |
368 | |
369 For each blacklist_from entry from the last blank line of | |
370 ~/.spamassassin/user_prefs to the end of the file, a list of host and domain | |
371 names along with their frequency counts is displayed. This information can be | |
372 used to replace multiple blacklist_from entries with a single wildcard entry | |
373 such as: | |
374 | |
375 blacklist_from *@*amazingoffersdirect2u.com" | |
376 (interactive) | |
377 (let* ((file (expand-file-name "~/.spamassassin/user_prefs")) | |
378 (domains (make-hash-table :test 'equal))) | |
379 (find-file file) | |
380 ;; Only consider entries between last blank line and end of file. | |
381 (goto-char (1- (point-max))) | |
382 (search-backward-regexp "^$") | |
383 ;; Perform frequency count. | |
384 (save-excursion | |
385 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$" | |
386 nil t) | |
387 (let ((host (match-string 2)) | |
388 value) | |
389 ;; Remove top-level-domain from hostname. | |
390 (setq host (cdr (reverse (split-string host "\\.")))) | |
391 ;; Add counts for each host and domain part. | |
392 (while host | |
393 (setq value (gethash (car host) domains)) | |
394 (puthash (car host) (1+ (if (not value) 0 value)) domains) | |
395 (setq host (cdr host)))))) | |
396 | |
397 ;; Output | |
398 (delete-other-windows) | |
399 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*")) | |
400 (erase-buffer) | |
401 (maphash '(lambda (key value) "" | |
402 (if (> value 2) | |
403 (insert (format "%s %s\n" key value)))) | |
404 domains) | |
405 (sort-numeric-fields 2 (point-min) (point-max)) | |
406 (reverse-region (point-min) (point-max)) | |
407 (goto-char (point-min)))) | |
408 | |
409 (provide 'mh-junk) | |
410 | |
411 ;;; Local Variables: | |
412 ;;; indent-tabs-mode: nil | |
413 ;;; sentence-end-double-space: nil | |
414 ;;; End: | |
415 | |
416 ;;; mh-junk.el ends here |