Mercurial > emacs
annotate lisp/mh-e/mh-junk.el @ 56433:a7600d0e7fdc
(cua--preserve-mark-commands): New defvar.
Init to beginning-of-buffer and end-of-buffer.
(cua--undo-push-mark): New defvar.
(cua--pre-command-handler): Set inhibit-mark-movement if mark is
already active and command is in cua--preserve-mark-commands.
Also fix check for shift modifier on non-window systems.
(cua--post-command-handler): Clear inhibit-mark-movement if set.
author | Kim F. Storm <storm@cua.dk> |
---|---|
date | Fri, 16 Jul 2004 10:42:26 +0000 |
parents | d36b00b98db0 |
children | e9a6cbc8ca5e 97905c4f1a42 |
rev | line source |
---|---|
50702 | 1 ;;; mh-junk.el --- Interface to anti-spam measures |
2 | |
3 ;; Copyright (C) 2003 Free Software Foundation, Inc. | |
4 | |
5 ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>, | |
6 ;; Bill Wohler <wohler@newt.com> | |
7 ;; Maintainer: Bill Wohler <wohler@newt.com> | |
8 ;; Keywords: mail, spam | |
9 | |
10 ;; This file is part of GNU Emacs. | |
11 | |
12 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 ;; it under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
25 ;; Boston, MA 02111-1307, USA. | |
26 | |
27 ;;; Commentary: | |
28 | |
29 ;; Spam handling in MH-E. | |
30 | |
31 ;;; Change Log: | |
32 | |
33 ;;; Code: | |
34 | |
35 (require 'mh-e) | |
36 | |
37 ;; Interactive functions callable from the folder buffer | |
38 ;;;###mh-autoload | |
56406
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
39 (defun mh-junk-blacklist (range) |
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
40 "Blacklist RANGE as spam. |
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
41 |
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
42 Check the documentation of `mh-interactive-range' to see how RANGE is read in |
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
43 interactive use. |
50702 | 44 |
45 First the appropriate function is called depending on the value of | |
46 `mh-junk-choice'. Then if `mh-junk-mail-folder' is a string then the message is | |
47 refiled to that folder. If nil, the message is deleted. | |
48 | |
49 To change the spam program being used, customize `mh-junk-program'. Directly | |
50 setting `mh-junk-choice' is not recommended. | |
51 | |
52 The documentation for the following functions describes what setup is needed | |
53 for the different spam fighting programs: | |
54 | |
55 - `mh-bogofilter-blacklist' | |
56 - `mh-spamprobe-blacklist' | |
57 - `mh-spamassassin-blacklist'" | |
56406
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
58 (interactive (list (mh-interactive-range "Blacklist"))) |
50702 | 59 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist)))) |
60 (unless blacklist-func | |
61 (error "Customize `mh-junk-program' appropriately")) | |
62 (let ((dest (cond ((null mh-junk-mail-folder) nil) | |
63 ((equal mh-junk-mail-folder "") "+") | |
64 ((eq (aref mh-junk-mail-folder 0) ?+) | |
65 mh-junk-mail-folder) | |
66 ((eq (aref mh-junk-mail-folder 0) ?@) | |
67 (concat mh-current-folder "/" | |
68 (substring mh-junk-mail-folder 1))) | |
69 (t (concat "+" mh-junk-mail-folder))))) | |
56406
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
70 (mh-iterate-on-range msg range |
50702 | 71 (funcall (symbol-function blacklist-func) msg) |
72 (if dest | |
73 (mh-refile-a-msg nil (intern dest)) | |
74 (mh-delete-a-msg nil))) | |
75 (mh-next-msg)))) | |
76 | |
77 ;;;###mh-autoload | |
56406
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
78 (defun mh-junk-whitelist (range) |
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
79 "Whitelist RANGE incorrectly classified as spam. |
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
80 |
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
81 Check the documentation of `mh-interactive-range' to see how RANGE is read in |
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
82 interactive use. |
50702 | 83 |
84 First the appropriate function is called depending on the value of | |
85 `mh-junk-choice'. Then the message is refiled to `mh-inbox'. | |
86 | |
87 To change the spam program being used, customize `mh-junk-program'. Directly | |
88 setting `mh-junk-choice' is not recommended." | |
56406
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
89 (interactive (list (mh-interactive-range "Whitelist"))) |
50702 | 90 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist)))) |
91 (unless whitelist-func | |
92 (error "Customize `mh-junk-program' appropriately")) | |
56406
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
93 (mh-iterate-on-range msg range |
50702 | 94 (funcall (symbol-function whitelist-func) msg) |
95 (mh-refile-a-msg nil (intern mh-inbox))) | |
96 (mh-next-msg))) | |
97 | |
98 | |
99 | |
100 ;; Bogofilter Interface | |
101 | |
102 (defvar mh-bogofilter-executable (executable-find "bogofilter")) | |
103 | |
104 (defun mh-bogofilter-blacklist (msg) | |
105 "Classify MSG as spam. | |
106 Tell bogofilter that the message is spam. | |
107 | |
108 Bogofilter is a Bayesian spam filtering program. Get it from your local | |
109 distribution or from: | |
110 http://bogofilter.sourceforge.net/ | |
111 | |
112 You first need to teach bogofilter. This is done by running | |
113 | |
114 bogofilter -n < good-message | |
115 | |
116 on every good message, and | |
117 | |
118 bogofilter -s < spam-message | |
119 | |
120 on every spam message. Most Bayesian filters need 1000 to 5000 of each to | |
121 start doing a good job. | |
122 | |
123 To use bogofilter, add the following .procmailrc recipes which you can also | |
124 find in the bogofilter man page: | |
125 | |
126 # Bogofilter | |
127 :0fw | |
128 | bogofilter -u -e -p | |
129 | |
130 :0 | |
131 * ^X-Bogosity: Yes, tests=bogofilter | |
132 $SPAM | |
133 | |
134 Bogofilter continues to feed the messages it classifies back into its | |
135 database. Occasionally it misses, and those messages need to be reclassified. | |
136 MH-E can do this for you. Use \\[mh-junk-blacklist] to reclassify messges in | |
137 your +inbox as spam, and \\[mh-junk-whitelist] to reclassify messages in your | |
138 spambox as good messages." | |
139 (unless mh-bogofilter-executable | |
140 (error "Couldn't find the bogofilter executable")) | |
141 (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
142 (call-process mh-bogofilter-executable msg-file 0 nil "-Ns"))) | |
143 | |
144 (defun mh-bogofilter-whitelist (msg) | |
145 "Reinstate incorrectly filtered MSG. | |
146 Train bogofilter to think of the message as non-spam." | |
147 (unless mh-bogofilter-executable | |
148 (error "Couldn't find the bogofilter executable")) | |
149 (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
150 (call-process mh-bogofilter-executable msg-file 0 nil "-Sn"))) | |
151 | |
152 | |
153 | |
154 ;; Spamprobe Interface | |
155 | |
156 (defvar mh-spamprobe-executable (executable-find "spamprobe")) | |
157 | |
158 (defun mh-spamprobe-blacklist (msg) | |
159 "Classify MSG as spam. | |
160 Tell spamprobe that the message is spam. | |
161 | |
162 Spamprobe is a Bayesian spam filtering program. More info about the program can | |
163 be found at: | |
164 http://spamprobe.sourceforge.net | |
165 | |
166 Here is a procmail recipe to stores incoming spam mail into the folder +spam | |
167 and good mail in /home/user/Mail/mdrop/mbox. This recipe is provided as an | |
168 example in the spamprobe man page. | |
169 | |
170 PATH=/bin:/usr/bin:/usr/local/bin | |
171 DEFAULT=/home/user/Mail/mdrop/mbox | |
172 SPAM=/home/user/Mail/spam/. | |
173 | |
174 # Spamprobe filtering | |
175 :0 | |
176 SCORE=| spamprobe receive | |
177 :0 wf | |
178 | formail -I \"X-SpamProbe: $SCORE\" | |
179 :0 a: | |
180 *^X-SpamProbe: SPAM | |
181 $SPAM | |
182 | |
183 Occasionally some good mail gets misclassified as spam. You can use | |
184 \\[mh-junk-whitelist] to reclassify that as good mail." | |
185 (unless mh-spamprobe-executable | |
186 (error "Couldn't find the spamprobe executable")) | |
187 (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
188 (call-process mh-spamprobe-executable msg-file 0 nil "spam"))) | |
189 | |
190 (defun mh-spamprobe-whitelist (msg) | |
191 "Reinstate incorrectly filtered MSG. | |
192 Train spamprobe to think of the message as non-spam." | |
193 (unless mh-spamprobe-executable | |
194 (error "Couldn't find the spamprobe executable")) | |
195 (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
196 (call-process mh-spamprobe-executable msg-file 0 nil "good"))) | |
197 | |
198 | |
199 | |
200 ;; Spamassassin Interface | |
201 | |
202 (defvar mh-spamassassin-executable (executable-find "spamassassin")) | |
203 (defvar mh-sa-learn-executable (executable-find "sa-learn")) | |
204 | |
205 (defun mh-spamassassin-blacklist (msg) | |
206 "Blacklist MSG. | |
207 This is done by sending the message to Razor and by appending the sender to | |
208 ~/.spamassassin/user_prefs in a blacklist_from rule. If sa-learn is available, | |
209 the message is also recategorized as spam. | |
210 | |
211 Spamassassin is an excellent spam filter. For more information, see: | |
212 http://spamassassin.org/. | |
213 | |
214 I ran \"spamassassin -t\" on every mail message in my archive and ran an | |
215 analysis in Gnumeric to find that the standard deviation of good mail | |
216 scored under 5 (coincidentally, the spamassassin default for \"spam\"). | |
217 | |
218 Furthermore, I observed that there weren't any messages with a score of 8 | |
219 or more that were interesting, so I added a couple of points to be | |
220 conservative and send any message with a score of 10 or more down the | |
221 drain. You might want to use a score of 12 or 13 to be really conservative. | |
222 I have found that this really decreases the amount of junk to review. | |
223 | |
224 Messages with a score of 5-9 are set aside for later review. The major | |
225 weakness of rules-based filters is a plethora of false positives\; I catch one | |
226 or two legitimate messages in here a week, so it is worthwhile to check. | |
227 | |
228 You might choose to do this analysis yourself to pick a good score for | |
229 deleting spam sight unseen, or you might pick a score out of a hat, or you | |
230 might choose to be very conservative and not delete any messages at all. | |
231 | |
232 Based upon this discussion, here is what the associated ~/.procmailrc | |
233 entries look like. These rules appear before my list filters so that spam | |
234 sent to mailing lists gets pruned too. | |
235 | |
236 # | |
237 # Spam | |
238 # | |
239 :0fw | |
240 | spamc | |
241 | |
242 # Anything with a spam level of 10 or more is junked immediately. | |
243 :0: | |
244 * ^X-Spam-Level: .......... | |
245 /dev/null | |
246 | |
247 :0 | |
248 * ^X-Spam-Status: Yes | |
249 $SPAM | |
250 | |
251 If you don't use \"spamc\", use \"spamassassin -P -a\". | |
252 | |
253 A handful of spam does find its way into +inbox. In this case, use | |
254 \\[mh-junk-blacklist] to add a \"blacklist_from\" line to | |
255 ~/spamassassin/user_prefs, delete the message, and send the message to the | |
256 Razor, so that others might not see this spam. | |
257 | |
258 Over time, you see some patterns in the blacklisted addresses and can | |
259 replace several lines with wildcards. For example, it is clear that High | |
260 Speed Media is the biggest bunch of jerks on the Net. Here are some of the | |
261 entries I have for them, and the list continues to grow. | |
262 | |
263 blacklist_from *@*-hsm-*.com | |
264 blacklist_from *@*182*643*.com | |
265 blacklist_from *@*antarhsm*.com | |
266 blacklist_from *@*h*speed* | |
267 blacklist_from *@*hsm*182*.com | |
268 blacklist_from *@*hsm*643*.com | |
269 blacklist_from *@*hsmridi2983cslt227.com | |
270 blacklist_from *@*list*hsm*.com | |
271 blacklist_from *@h*s*media* | |
272 blacklist_from *@hsmdrct.com | |
273 blacklist_from *@hsmridi2983csltsite.com | |
274 | |
275 The function `mh-spamassassin-identify-spammers' is provided that shows the | |
276 frequency counts of the host and domain names in your blacklist_from | |
277 entries. This can be helpful when editing the blacklist_from entries. | |
278 | |
279 In versions of spamassassin (2.50 and on) that support a Bayesian classifier, | |
280 \\[mh-junk-blacklist] uses the sa-learn program to recategorize the message as | |
281 spam. Neither MH-E, nor spamassassin, rebuilds the database after adding | |
282 words, so you will need to run \"sa-learn --rebuild\" periodically. This can | |
283 be done by adding the following to your crontab: | |
284 | |
285 0 * * * * sa-learn --rebuild > /dev/null 2>&1" | |
286 (unless mh-spamassassin-executable | |
287 (error "Couldn't find the spamassassin executable")) | |
288 (let ((current-folder mh-current-folder) | |
289 (msg-file (mh-msg-filename msg mh-current-folder)) | |
290 (sender)) | |
291 (save-excursion | |
292 (message "Giving this message the Razor...") | |
293 (mh-truncate-log-buffer) | |
294 (call-process mh-spamassassin-executable msg-file mh-log-buffer nil | |
295 "--report" "--remove-from-whitelist") | |
296 (when mh-sa-learn-executable | |
297 (message "Recategorizing this message as spam...") | |
298 (call-process mh-sa-learn-executable msg-file mh-log-buffer nil | |
56406
d36b00b98db0
Upgraded to MH-E version 7.4.4.
Bill Wohler <wohler@newt.com>
parents:
52401
diff
changeset
|
299 "--single" "--spam" "--local" "--no-rebuild")) |
50702 | 300 (message "Blacklisting address...") |
301 (set-buffer (get-buffer-create mh-temp-buffer)) | |
302 (erase-buffer) | |
303 (call-process (expand-file-name mh-scan-prog mh-progs) nil t nil | |
304 (format "%s" msg) current-folder | |
305 "-format" "%<(mymbox{from})%|%(addr{from})%>") | |
306 (goto-char (point-min)) | |
307 (if (search-forward-regexp "^\\(.+\\)$" nil t) | |
308 (progn | |
309 (setq sender (match-string 0)) | |
310 (mh-spamassassin-add-rule "blacklist_from" sender) | |
311 (message "Blacklisting address...done")) | |
312 (message "Blacklisting address...not done (from my address)"))))) | |
313 | |
314 (defun mh-spamassassin-whitelist (msg) | |
315 "Whitelist MSG. | |
316 Add a whitelist_from rule to the ~/.spamassassin/user_prefs file. If sa-learn | |
317 is available, then the message is recategorized as ham." | |
318 (unless mh-spamassassin-executable | |
319 (error "Couldn't find the spamassassin executable")) | |
320 (let ((msg-file (mh-msg-filename msg mh-current-folder)) | |
321 (show-buffer (get-buffer mh-show-buffer)) | |
322 from) | |
323 (save-excursion | |
324 (set-buffer (get-buffer-create mh-temp-buffer)) | |
325 (erase-buffer) | |
326 (message "Removing spamassassin markup from message...") | |
327 (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil | |
328 "--remove-markup") | |
329 (if show-buffer | |
330 (kill-buffer show-buffer)) | |
331 (write-file msg-file) | |
332 (when mh-sa-learn-executable | |
333 (message "Recategorizing this message as ham...") | |
334 (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil | |
335 "--single" "--ham" "--local --no-rebuild")) | |
336 (message "Whitelisting address...") | |
337 (setq from (car (ietf-drums-parse-address (mh-get-header-field "From:")))) | |
338 (kill-buffer nil) | |
339 (unless (equal from "") | |
340 (mh-spamassassin-add-rule "whitelist_from" from)) | |
341 (message "Whitelisting address...done")))) | |
342 | |
343 (defun mh-spamassassin-add-rule (rule body) | |
344 "Add a new rule to ~/.spamassassin/user_prefs. | |
345 The name of the rule is RULE and its body is BODY." | |
346 (save-window-excursion | |
347 (let* ((line (format "%s\t%s\n" rule body)) | |
348 (case-fold-search t) | |
349 (file (expand-file-name "~/.spamassassin/user_prefs")) | |
350 (buffer-exists (find-buffer-visiting file))) | |
351 (find-file file) | |
352 (if (not (search-forward (format "\n%s" line) nil t)) | |
353 (progn | |
354 (goto-char (point-max)) | |
355 (insert (if (bolp) "" "\n") line) | |
356 (save-buffer))) | |
357 (if (not buffer-exists) | |
358 (kill-buffer nil))))) | |
359 | |
360 (defun mh-spamassassin-identify-spammers () | |
361 "Identifies spammers who are repeat offenders. | |
362 | |
363 For each blacklist_from entry from the last blank line of | |
364 ~/.spamassassin/user_prefs to the end of the file, a list of host and domain | |
365 names along with their frequency counts is displayed. This information can be | |
366 used to replace multiple blacklist_from entries with a single wildcard entry | |
367 such as: | |
368 | |
369 blacklist_from *@*amazingoffersdirect2u.com" | |
370 (interactive) | |
371 (let* ((file (expand-file-name "~/.spamassassin/user_prefs")) | |
372 (domains (make-hash-table :test 'equal))) | |
373 (find-file file) | |
374 ;; Only consider entries between last blank line and end of file. | |
375 (goto-char (1- (point-max))) | |
376 (search-backward-regexp "^$") | |
377 ;; Perform frequency count. | |
378 (save-excursion | |
379 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$" | |
380 nil t) | |
381 (let ((host (match-string 2)) | |
382 value) | |
383 ;; Remove top-level-domain from hostname. | |
384 (setq host (cdr (reverse (split-string host "\\.")))) | |
385 ;; Add counts for each host and domain part. | |
386 (while host | |
387 (setq value (gethash (car host) domains)) | |
388 (puthash (car host) (1+ (if (not value) 0 value)) domains) | |
389 (setq host (cdr host)))))) | |
390 | |
391 ;; Output | |
392 (delete-other-windows) | |
393 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*")) | |
394 (erase-buffer) | |
395 (maphash '(lambda (key value) "" | |
396 (if (> value 2) | |
397 (insert (format "%s %s\n" key value)))) | |
398 domains) | |
399 (sort-numeric-fields 2 (point-min) (point-max)) | |
400 (reverse-region (point-min) (point-max)) | |
401 (goto-char (point-min)))) | |
402 | |
403 (provide 'mh-junk) | |
404 | |
405 ;;; Local Variables: | |
406 ;;; indent-tabs-mode: nil | |
407 ;;; sentence-end-double-space: nil | |
408 ;;; End: | |
409 | |
52401 | 410 ;;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1 |
50702 | 411 ;;; mh-junk.el ends here |