annotate lisp/gnus/spam-stat.el @ 95729:7aa7574f6edd

*** empty log message ***
author Jason Rumney <jasonr@gnu.org>
date Mon, 09 Jun 2008 22:06:21 +0000
parents f42ef85caf91
children a9dc0e7c3f2b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
1 ;;; spam-stat.el --- detecting spam based on statistics
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
2
79708
1cb31606209f Add 2008 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 78224
diff changeset
3 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
4
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
5 ;; Author: Alex Schroeder <alex@gnu.org>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
6 ;; Keywords: network
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
7 ;; URL: http://www.emacswiki.org/cgi-bin/wiki.pl?SpamStat
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
8
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
9 ;; This file is part of GNU Emacs.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
10
94662
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
11 ;; GNU Emacs is free software: you can redistribute it and/or modify
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
12 ;; it under the terms of the GNU General Public License as published by
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
13 ;; the Free Software Foundation, either version 3 of the License, or
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
14 ;; (at your option) any later version.
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
15
94662
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
16 ;; GNU Emacs is distributed in the hope that it will be useful,
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
19 ;; GNU General Public License for more details.
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
20
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
21 ;; You should have received a copy of the GNU General Public License
94662
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
23
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
24 ;;; Commentary:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
25
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
26 ;; This implements spam analysis according to Paul Graham in "A Plan
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
27 ;; for Spam". The basis for all this is a statistical distribution of
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
28 ;; words for your spam and non-spam mails. We need this information
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
29 ;; in a hash-table so that the analysis can use the information when
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
30 ;; looking at your mails. Therefore, before you begin, you need tons
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
31 ;; of mails (Graham uses 4000 non-spam and 4000 spam mails for his
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
32 ;; experiments).
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
33 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
34 ;; The main interface to using spam-stat, are the following functions:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
35 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
36 ;; `spam-stat-buffer-is-spam' -- called in a buffer, that buffer is
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
37 ;; considered to be a new spam mail; use this for new mail that has
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
38 ;; not been processed before
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
39 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
40 ;; `spam-stat-buffer-is-non-spam' -- called in a buffer, that buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
41 ;; is considered to be a new non-spam mail; use this for new mail that
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
42 ;; has not been processed before
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
43 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
44 ;; `spam-stat-buffer-change-to-spam' -- called in a buffer, that
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
45 ;; buffer is no longer considered to be normal mail but spam; use this
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
46 ;; to change the status of a mail that has already been processed as
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
47 ;; non-spam
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
48 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
49 ;; `spam-stat-buffer-change-to-non-spam' -- called in a buffer, that
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
50 ;; buffer is no longer considered to be spam but normal mail; use this
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
51 ;; to change the status of a mail that has already been processed as
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
52 ;; spam
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
53 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
54 ;; `spam-stat-save' -- save the hash table to the file; the filename
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
55 ;; used is stored in the variable `spam-stat-file'
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
56 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
57 ;; `spam-stat-load' -- load the hash table from a file; the filename
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
58 ;; used is stored in the variable `spam-stat-file'
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
59 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
60 ;; `spam-stat-score-word' -- return the spam score for a word
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
61 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
62 ;; `spam-stat-score-buffer' -- return the spam score for a buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
63 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
64 ;; `spam-stat-split-fancy' -- for fancy mail splitting; add
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
65 ;; the rule (: spam-stat-split-fancy) to `nnmail-split-fancy'
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
66 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
67 ;; This requires the following in your ~/.gnus file:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
68 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
69 ;; (require 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
70 ;; (spam-stat-load)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
71
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
72 ;;; Testing:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
73
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
74 ;; Typical test will involve calls to the following functions:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
75 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
76 ;; Reset: (spam-stat-reset)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
77 ;; Learn spam: (spam-stat-process-spam-directory "~/Mail/mail/spam")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
78 ;; Learn non-spam: (spam-stat-process-non-spam-directory "~/Mail/mail/misc")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
79 ;; Save table: (spam-stat-save)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
80 ;; File size: (nth 7 (file-attributes spam-stat-file))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
81 ;; Number of words: (hash-table-count spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
82 ;; Test spam: (spam-stat-test-directory "~/Mail/mail/spam")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
83 ;; Test non-spam: (spam-stat-test-directory "~/Mail/mail/misc")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
84 ;; Reduce table size: (spam-stat-reduce-size)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
85 ;; Save table: (spam-stat-save)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
86 ;; File size: (nth 7 (file-attributes spam-stat-file))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
87 ;; Number of words: (hash-table-count spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
88 ;; Test spam: (spam-stat-test-directory "~/Mail/mail/spam")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
89 ;; Test non-spam: (spam-stat-test-directory "~/Mail/mail/misc")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
90
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
91 ;;; Dictionary Creation:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
92
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
93 ;; Typically, you will filter away mailing lists etc. using specific
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
94 ;; rules in `nnmail-split-fancy'. Somewhere among these rules, you
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
95 ;; will filter spam. Here is how you would create your dictionary:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
96
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
97 ;; Reset: (spam-stat-reset)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
98 ;; Learn spam: (spam-stat-process-spam-directory "~/Mail/mail/spam")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
99 ;; Learn non-spam: (spam-stat-process-non-spam-directory "~/Mail/mail/misc")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
100 ;; Repeat for any other non-spam group you need...
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
101 ;; Reduce table size: (spam-stat-reduce-size)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
102 ;; Save table: (spam-stat-save)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
103
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
104 ;;; Todo:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
105
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
106 ;; Speed it up. Integrate with Gnus such that it uses spam and expiry
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
107 ;; marks to call the appropriate functions when leaving the summary
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
108 ;; buffer and saves the hash table when leaving Gnus. More testing:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
109 ;; More mails, disabling SpamAssassin, double checking algorithm, find
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
110 ;; improved algorithm.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
111
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
112 ;;; Thanks:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
113
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
114 ;; Ted Zlatanov <tzz@lifelogs.com>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
115 ;; Jesper Harder <harder@myrealbox.com>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
116 ;; Dan Schmidt <dfan@dfan.org>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
117 ;; Lasse Rasinen <lrasinen@iki.fi>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
118 ;; Milan Zamazal <pdm@zamazal.org>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
119
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
120
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
121
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
122 ;;; Code:
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
123 (require 'mail-parse)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
124
65284
6e266fbfe59b (gnus-original-article-buffer): Add defvar.
Juanma Barranquero <lekktu@gmail.com>
parents: 64754
diff changeset
125 (defvar gnus-original-article-buffer)
6e266fbfe59b (gnus-original-article-buffer): Add defvar.
Juanma Barranquero <lekktu@gmail.com>
parents: 64754
diff changeset
126
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
127 (defgroup spam-stat nil
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
128 "Statistical spam detection for Emacs.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
129 Use the functions to build a dictionary of words and their statistical
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
130 distribution in spam and non-spam mails. Then use a function to determine
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
131 whether a buffer contains spam or not."
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 57856
diff changeset
132 :version "22.1"
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
133 :group 'gnus)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
134
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
135 (defcustom spam-stat-file "~/.spam-stat.el"
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
136 "File used to save and load the dictionary.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
137 See `spam-stat-to-hash-table' for the format of the file."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
138 :type 'file
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
139 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
140
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
141 (defcustom spam-stat-install-hooks t
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
142 "Whether spam-stat should install its hooks in Gnus.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
143 This is set to nil if you use spam-stat through spam.el."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
144 :type 'boolean
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
145 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
146
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
147 (defcustom spam-stat-unknown-word-score 0.2
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
148 "The score to use for unknown words.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
149 Also used for words that don't appear often enough."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
150 :type 'number
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
151 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
152
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
153 (defcustom spam-stat-max-word-length 15
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
154 "Only words shorter than this will be considered."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
155 :type 'integer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
156 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
157
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
158 (defcustom spam-stat-max-buffer-length 10240
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
159 "Only the beginning of buffers will be analyzed.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
160 This variable says how many characters this will be."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
161 :type 'integer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
162 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
163
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
164 (defcustom spam-stat-split-fancy-spam-group "mail.spam"
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
165 "Name of the group where spam should be stored.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
166 If `spam-stat-split-fancy' is used in fancy splitting rules. Has
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
167 no effect when spam-stat is invoked through spam.el."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
168 :type 'string
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
169 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
170
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
171 (defcustom spam-stat-split-fancy-spam-threshold 0.9
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
172 "Spam score threshold in spam-stat-split-fancy."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
173 :type 'number
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
174 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
175
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
176 (defcustom spam-stat-washing-hook nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
177 "Hook applied to each message before analysis."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
178 :type 'hook
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
179 :group 'spam-stat)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
180
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
181 (defcustom spam-stat-score-buffer-user-functions nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
182 "List of additional scoring functions.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
183 Called one by one on the buffer.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
184
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
185 If all of these functions return non-nil answers, these numerical
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
186 answers are added to the computed spam stat score on the buffer. If
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
187 you defun such functions, make sure they don't return the buffer in a
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
188 narrowed state or such: use, for example, `save-excursion'. Each of
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
189 your functions is also passed the initial spam-stat score which might
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
190 aid in your scoring.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
191
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
192 Also be careful when defining such functions. If they take a long
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
193 time, they will slow down your mail splitting. Thus, if the buffer is
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
194 large, don't forget to use smaller regions, by wrapping your work in,
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
195 say, `with-spam-stat-max-buffer-size'."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
196 :type '(repeat sexp)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
197 :group 'spam-stat)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
198
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
199 (defcustom spam-stat-process-directory-age 90
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
200 "Max. age of files to be processed in directory, in days.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
201 When using `spam-stat-process-spam-directory' or
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
202 `spam-stat-process-non-spam-directory', only files that have
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
203 been touched in this many days will be considered. Without
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
204 this filter, re-training spam-stat with several thousand messages
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
205 will start to take a very long time."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
206 :type 'number
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
207 :group 'spam-stat)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
208
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
209 (defvar spam-stat-last-saved-at nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
210 "Time stamp of last change of spam-stat-file on this run")
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
211
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
212 (defvar spam-stat-syntax-table
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
213 (let ((table (copy-syntax-table text-mode-syntax-table)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
214 (modify-syntax-entry ?- "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
215 (modify-syntax-entry ?_ "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
216 (modify-syntax-entry ?. "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
217 (modify-syntax-entry ?! "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
218 (modify-syntax-entry ?? "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
219 (modify-syntax-entry ?+ "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
220 table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
221 "Syntax table used when processing mails for statistical analysis.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
222 The important part is which characters are word constituents.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
223
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
224 (defvar spam-stat-dirty nil
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
225 "Whether the spam-stat database needs saving.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
226
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
227 (defvar spam-stat-buffer nil
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
228 "Buffer to use for scoring while splitting.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
229 This is set by hooking into Gnus.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
230
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
231 (defvar spam-stat-buffer-name " *spam stat buffer*"
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
232 "Name of the `spam-stat-buffer'.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
233
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
234 (defvar spam-stat-coding-system
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
235 (if (mm-coding-system-p 'emacs-mule) 'emacs-mule 'raw-text)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
236 "Coding system used for `spam-stat-file'.")
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
237
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
238 ;; Hooking into Gnus
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
239
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
240 (defun spam-stat-store-current-buffer ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
241 "Store a copy of the current buffer in `spam-stat-buffer'."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
242 (let ((buf (current-buffer)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
243 (with-current-buffer (get-buffer-create spam-stat-buffer-name)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
244 (erase-buffer)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
245 (insert-buffer-substring buf)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
246 (setq spam-stat-buffer (current-buffer)))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
247
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
248 (defun spam-stat-store-gnus-article-buffer ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
249 "Store a copy of the current article in `spam-stat-buffer'.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
250 This uses `gnus-article-buffer'."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
251 (with-current-buffer gnus-original-article-buffer
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
252 (spam-stat-store-current-buffer)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
253
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
254 ;; Data -- not using defstruct in order to save space and time
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
255
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
256 (defvar spam-stat (make-hash-table :test 'equal)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
257 "Hash table used to store the statistics.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
258 Use `spam-stat-load' to load the file.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
259 Every word is used as a key in this table. The value is a vector.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
260 Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
261 `spam-stat-bad', and `spam-stat-score' to access this vector.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
262
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
263 (defvar spam-stat-ngood 0
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
264 "The number of good mails in the dictionary.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
265
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
266 (defvar spam-stat-nbad 0
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
267 "The number of bad mails in the dictionary.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
268
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
269 (defvar spam-stat-error-holder nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
270 "A holder for condition-case errors while scoring buffers.")
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
271
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
272 (defsubst spam-stat-good (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
273 "Return the number of times this word belongs to good mails."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
274 (aref entry 0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
275
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
276 (defsubst spam-stat-bad (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
277 "Return the number of times this word belongs to bad mails."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
278 (aref entry 1))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
279
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
280 (defsubst spam-stat-score (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
281 "Set the score of this word."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
282 (if entry
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
283 (aref entry 2)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
284 spam-stat-unknown-word-score))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
285
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
286 (defsubst spam-stat-set-good (entry value)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
287 "Set the number of times this word belongs to good mails."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
288 (aset entry 0 value))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
289
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
290 (defsubst spam-stat-set-bad (entry value)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
291 "Set the number of times this word belongs to bad mails."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
292 (aset entry 1 value))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
293
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
294 (defsubst spam-stat-set-score (entry value)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
295 "Set the score of this word."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
296 (aset entry 2 value))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
297
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
298 (defsubst spam-stat-make-entry (good bad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
299 "Return a vector with the given properties."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
300 (let ((entry (vector good bad nil)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
301 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
302 entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
303
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
304 ;; Computing
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
305
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
306 (defun spam-stat-compute-score (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
307 "Compute the score of this word. 1.0 means spam."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
308 ;; promote all numbers to floats for the divisions
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
309 (let* ((g (* 2.0 (spam-stat-good entry)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
310 (b (float (spam-stat-bad entry))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
311 (cond ((< (+ g b) 5)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
312 .2)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
313 ((= 0 spam-stat-ngood)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
314 .99)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
315 ((= 0 spam-stat-nbad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
316 .01)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
317 (t
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
318 (max .01
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
319 (min .99 (/ (/ b spam-stat-nbad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
320 (+ (/ g spam-stat-ngood)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
321 (/ b spam-stat-nbad)))))))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
322
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
323 ;; Parsing
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
324
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
325 (defmacro with-spam-stat-max-buffer-size (&rest body)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
326 "Narrow the buffer down to the first 4k characters, then evaluate BODY."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
327 `(save-restriction
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
328 (when (> (- (point-max)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
329 (point-min))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
330 spam-stat-max-buffer-length)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
331 (narrow-to-region (point-min)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
332 (+ (point-min) spam-stat-max-buffer-length)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
333 ,@body))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
334
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
335 (defun spam-stat-buffer-words ()
68931
d7445550220c (spam-stat-buffer-words): Fix typo in docstring.
Juanma Barranquero <lekktu@gmail.com>
parents: 68633
diff changeset
336 "Return a hash table of words and number of occurrences in the buffer."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
337 (run-hooks 'spam-stat-washing-hook)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
338 (with-spam-stat-max-buffer-size
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
339 (with-syntax-table spam-stat-syntax-table
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
340 (goto-char (point-min))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
341 (let ((result (make-hash-table :test 'equal))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
342 word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
343 (while (re-search-forward "\\w+" nil t)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
344 (setq word (match-string-no-properties 0)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
345 count (1+ (gethash word result 0)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
346 (when (< (length word) spam-stat-max-word-length)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
347 (puthash word count result)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
348 result))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
349
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
350 (defun spam-stat-buffer-is-spam ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
351 "Consider current buffer to be a new spam mail."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
352 (setq spam-stat-nbad (1+ spam-stat-nbad))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
353 (maphash
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
354 (lambda (word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
355 (let ((entry (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
356 (if entry
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
357 (spam-stat-set-bad entry (+ count (spam-stat-bad entry)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
358 (setq entry (spam-stat-make-entry 0 count)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
359 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
360 (puthash word entry spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
361 (spam-stat-buffer-words))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
362 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
363
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
364 (defun spam-stat-buffer-is-non-spam ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
365 "Consider current buffer to be a new non-spam mail."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
366 (setq spam-stat-ngood (1+ spam-stat-ngood))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
367 (maphash
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
368 (lambda (word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
369 (let ((entry (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
370 (if entry
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
371 (spam-stat-set-good entry (+ count (spam-stat-good entry)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
372 (setq entry (spam-stat-make-entry count 0)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
373 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
374 (puthash word entry spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
375 (spam-stat-buffer-words))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
376 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
377
87227
8bb336490b3c (gnus-message): Autoload.
Glenn Morris <rgm@gnu.org>
parents: 85712
diff changeset
378 (autoload 'gnus-message "gnus-util")
8bb336490b3c (gnus-message): Autoload.
Glenn Morris <rgm@gnu.org>
parents: 85712
diff changeset
379
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
380 (defun spam-stat-buffer-change-to-spam ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
381 "Consider current buffer no longer normal mail but spam."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
382 (setq spam-stat-nbad (1+ spam-stat-nbad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
383 spam-stat-ngood (1- spam-stat-ngood))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
384 (maphash
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
385 (lambda (word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
386 (let ((entry (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
387 (if (not entry)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
388 (gnus-message 8 "This buffer has unknown words in it")
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
389 (spam-stat-set-good entry (- (spam-stat-good entry) count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
390 (spam-stat-set-bad entry (+ (spam-stat-bad entry) count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
391 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
392 (puthash word entry spam-stat))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
393 (spam-stat-buffer-words))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
394 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
395
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
396 (defun spam-stat-buffer-change-to-non-spam ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
397 "Consider current buffer no longer spam but normal mail."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
398 (setq spam-stat-nbad (1- spam-stat-nbad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
399 spam-stat-ngood (1+ spam-stat-ngood))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
400 (maphash
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
401 (lambda (word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
402 (let ((entry (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
403 (if (not entry)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
404 (gnus-message 8 "This buffer has unknown words in it")
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
405 (spam-stat-set-good entry (+ (spam-stat-good entry) count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
406 (spam-stat-set-bad entry (- (spam-stat-bad entry) count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
407 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
408 (puthash word entry spam-stat))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
409 (spam-stat-buffer-words))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
410 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
411
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
412 ;; Saving and Loading
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
413
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
414 (defun spam-stat-save (&optional force)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
415 "Save the `spam-stat' hash table as lisp file.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
416 With a prefix argument save unconditionally."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
417 (interactive "P")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
418 (when (or force spam-stat-dirty)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
419 (let ((coding-system-for-write spam-stat-coding-system))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
420 (with-temp-file spam-stat-file
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
421 (let ((standard-output (current-buffer))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
422 (font-lock-maximum-size 0))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
423 (insert (format ";-*- coding: %s; -*-\n" spam-stat-coding-system))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
424 (insert (format "(setq spam-stat-ngood %d spam-stat-nbad %d
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
425 spam-stat (spam-stat-to-hash-table '(" spam-stat-ngood spam-stat-nbad))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
426 (maphash (lambda (word entry)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
427 (prin1 (list word
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
428 (spam-stat-good entry)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
429 (spam-stat-bad entry))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
430 spam-stat)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
431 (insert ")))"))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
432 (message "Saved %s." spam-stat-file)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
433 (setq spam-stat-dirty nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
434 spam-stat-last-saved-at (nth 5 (file-attributes spam-stat-file)))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
435
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
436 (defun spam-stat-load ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
437 "Read the `spam-stat' hash table from disk."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
438 ;; TODO: maybe we should warn the user if spam-stat-dirty is t?
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
439 (let ((coding-system-for-read spam-stat-coding-system))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
440 (cond (spam-stat-dirty (message "Spam stat not loaded: spam-stat-dirty t"))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
441 ((or (not (boundp 'spam-stat-last-saved-at))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
442 (null spam-stat-last-saved-at)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
443 (not (equal spam-stat-last-saved-at
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
444 (nth 5 (file-attributes spam-stat-file)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
445 (progn
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
446 (load-file spam-stat-file)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
447 (setq spam-stat-dirty nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
448 spam-stat-last-saved-at
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
449 (nth 5 (file-attributes spam-stat-file)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
450 (t (message "Spam stat file not loaded: no change in disk..")))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
451
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
452 (defun spam-stat-to-hash-table (entries)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
453 "Turn list ENTRIES into a hash table and store as `spam-stat'.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
454 Every element in ENTRIES has the form \(WORD GOOD BAD) where WORD is
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
455 the word string, NGOOD is the number of good mails it has appeared in,
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
456 NBAD is the number of bad mails it has appeared in, GOOD is the number
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
457 of times it appeared in good mails, and BAD is the number of times it
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
458 has appeared in bad mails."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
459 (let ((table (make-hash-table :size (length entries)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
460 :test 'equal)))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
461 (mapc (lambda (l)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
462 (puthash (car l)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
463 (spam-stat-make-entry (nth 1 l) (nth 2 l))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
464 table))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
465 entries)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
466 table))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
467
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
468 (defun spam-stat-reset ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
469 "Reset `spam-stat' to an empty hash-table.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
470 This deletes all the statistics."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
471 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
472 (setq spam-stat (make-hash-table :test 'equal)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
473 spam-stat-ngood 0
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
474 spam-stat-nbad 0)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
475 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
476
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
477 ;; Scoring buffers
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
478
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
479 (defvar spam-stat-score-data nil
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
480 "Raw data used in the last run of `spam-stat-score-buffer'.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
481
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
482 (defsubst spam-stat-score-word (word)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
483 "Return score for WORD.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
484 The default score for unknown words is stored in
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
485 `spam-stat-unknown-word-score'."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
486 (spam-stat-score (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
487
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
488 (defun spam-stat-buffer-words-with-scores ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
489 "Process current buffer, return the 15 most conspicuous words.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
490 These are the words whose spam-stat differs the most from 0.5.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
491 The list returned contains elements of the form \(WORD SCORE DIFF),
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
492 where DIFF is the difference between SCORE and 0.5."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
493 (let (result word score)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
494 (maphash (lambda (word ignore)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
495 (setq score (spam-stat-score-word word)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
496 result (cons (list word score (abs (- score 0.5)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
497 result)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
498 (spam-stat-buffer-words))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
499 (setq result (sort result (lambda (a b) (< (nth 2 b) (nth 2 a)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
500 (setcdr (nthcdr 14 result) nil)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
501 result))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
502
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
503 (defun spam-stat-score-buffer ()
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
504 "Return a score describing the spam-probability for this buffer.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
505 Add user supplied modifications if supplied."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
506 (interactive) ; helps in debugging.
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
507 (setq spam-stat-score-data (spam-stat-buffer-words-with-scores))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
508 (let* ((probs (mapcar 'cadr spam-stat-score-data))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
509 (prod (apply #'* probs))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
510 (score0
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
511 (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
512 probs)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
513 (score1s
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
514 (condition-case
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
515 spam-stat-error-holder
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
516 (spam-stat-score-buffer-user score0)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
517 (error nil)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
518 (ans
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
519 (if score1s (+ score0 score1s) score0)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
520 (when (interactive-p)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
521 (message "%S" ans))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
522 ans))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
523
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
524 (defun spam-stat-score-buffer-user (&rest args)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
525 (let* ((scores
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
526 (mapcar
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
527 (lambda (fn)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
528 (apply fn args))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
529 spam-stat-score-buffer-user-functions)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
530 (if (memq nil scores) nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
531 (apply #'+ scores))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
532
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
533 (defun spam-stat-split-fancy ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
534 "Return the name of the spam group if the current mail is spam.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
535 Use this function on `nnmail-split-fancy'. If you are interested in
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
536 the raw data used for the last run of `spam-stat-score-buffer',
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
537 check the variable `spam-stat-score-data'."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
538 (condition-case spam-stat-error-holder
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
539 (progn
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
540 (set-buffer spam-stat-buffer)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
541 (goto-char (point-min))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
542 (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshold)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
543 (when (boundp 'nnmail-split-trace)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
544 (mapc (lambda (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
545 (push entry nnmail-split-trace))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
546 spam-stat-score-data))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
547 spam-stat-split-fancy-spam-group))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
548 (error (message "Error in spam-stat-split-fancy: %S" spam-stat-error-holder)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
549 nil)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
550
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
551 ;; Testing
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
552
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
553 (defun spam-stat-strip-xref ()
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
554 "Strip the the Xref header."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
555 (save-restriction
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
556 (mail-narrow-to-head)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
557 (when (re-search-forward "^Xref:.*\n" nil t)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
558 (delete-region (match-beginning 0) (match-end 0)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
559
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
560 (defun spam-stat-process-directory (dir func)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
561 "Process all the regular files in directory DIR using function FUNC."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
562 (let* ((files (directory-files dir t "^[^.]"))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
563 (max (/ (length files) 100.0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
564 (count 0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
565 (with-temp-buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
566 (dolist (f files)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
567 (when (and (file-readable-p f)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
568 (file-regular-p f)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
569 (> (nth 7 (file-attributes f)) 0)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
570 (< (time-to-number-of-days (time-since (nth 5 (file-attributes f))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
571 spam-stat-process-directory-age))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
572 (setq count (1+ count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
573 (message "Reading %s: %.2f%%" dir (/ count max))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
574 (insert-file-contents-literally f)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
575 (spam-stat-strip-xref)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
576 (funcall func)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
577 (erase-buffer))))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
578
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
579 (defun spam-stat-process-spam-directory (dir)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
580 "Process all the regular files in directory DIR as spam."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
581 (interactive "D")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
582 (spam-stat-process-directory dir 'spam-stat-buffer-is-spam))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
583
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
584 (defun spam-stat-process-non-spam-directory (dir)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
585 "Process all the regular files in directory DIR as non-spam."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
586 (interactive "D")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
587 (spam-stat-process-directory dir 'spam-stat-buffer-is-non-spam))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
588
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
589 (defun spam-stat-count ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
590 "Return size of `spam-stat'."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
591 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
592 (hash-table-count spam-stat))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
593
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
594 (defun spam-stat-test-directory (dir &optional verbose)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
595 "Test all the regular files in directory DIR for spam.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
596 If the result is 1.0, then all files are considered spam.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
597 If the result is 0.0, non of the files is considered spam.
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
598 You can use this to determine error rates.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
599
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
600 If VERBOSE is non-nil display names of files detected as spam or
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
601 non-spam in a temporary buffer. If it is the symbol `ham',
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
602 display non-spam files; otherwise display spam files."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
603 (interactive "DDirectory: ")
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
604 (let* ((files (directory-files dir t "^[^.]"))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
605 display-files
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
606 buffer-score
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
607 (total (length files))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
608 (score 0.0); float
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
609 (max (/ total 100.0)); float
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
610 (count 0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
611 (with-temp-buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
612 (dolist (f files)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
613 (when (and (file-readable-p f)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
614 (file-regular-p f)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
615 (> (nth 7 (file-attributes f)) 0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
616 (setq count (1+ count))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
617 (message "Reading %.2f%%, score %.2f"
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
618 (/ count max) (/ score count))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
619 (insert-file-contents-literally f)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
620 (setq buffer-score (spam-stat-score-buffer))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
621 (when (> buffer-score 0.9)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
622 (setq score (1+ score)))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
623 (when verbose
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
624 (if (> buffer-score 0.9)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
625 (unless (eq verbose 'ham) (push f display-files))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
626 (when (eq verbose 'ham) (push f display-files))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
627 (erase-buffer))))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
628 (when display-files
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
629 (with-output-to-temp-buffer "*spam-stat results*"
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
630 (dolist (file display-files)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
631 (princ file)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
632 (terpri))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
633 (message "Final score: %d / %d = %f" score total (/ score total))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
634
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
635 ;; Shrinking the dictionary
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
636
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
637 (defun spam-stat-reduce-size (&optional count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
638 "Reduce the size of `spam-stat'.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
639 This removes all words that occur less than COUNT from the dictionary.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
640 COUNT defaults to 5"
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
641 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
642 (setq count (or count 5))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
643 (maphash (lambda (key entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
644 (when (< (+ (spam-stat-good entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
645 (spam-stat-bad entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
646 count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
647 (remhash key spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
648 spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
649 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
650
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
651 (defun spam-stat-install-hooks-function ()
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
652 "Install the spam-stat function hooks."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
653 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
654 (add-hook 'nnmail-prepare-incoming-message-hook
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
655 'spam-stat-store-current-buffer)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
656 (add-hook 'gnus-select-article-hook
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
657 'spam-stat-store-gnus-article-buffer))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
658
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
659 (when spam-stat-install-hooks
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
660 (spam-stat-install-hooks-function))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
661
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
662 (defun spam-stat-unload-hook ()
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
663 "Uninstall the spam-stat function hooks."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
664 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
665 (remove-hook 'nnmail-prepare-incoming-message-hook
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
666 'spam-stat-store-current-buffer)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
667 (remove-hook 'gnus-select-article-hook
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
668 'spam-stat-store-gnus-article-buffer))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
669
57550
a19498f00f9a (spam-stat-unload-hook): Set as a variable w/ add-hook.
Richard M. Stallman <rms@gnu.org>
parents: 56927
diff changeset
670 (add-hook 'spam-stat-unload-hook 'spam-stat-unload-hook)
a19498f00f9a (spam-stat-unload-hook): Set as a variable w/ add-hook.
Richard M. Stallman <rms@gnu.org>
parents: 56927
diff changeset
671
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
672 (provide 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
673
93975
1e3a407766b9 Fix up comment convention on the arch-tag lines.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 87649
diff changeset
674 ;; arch-tag: ff1d2200-8ddb-42fb-bb7b-1b5e20448554
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
675 ;;; spam-stat.el ends here