annotate lisp/gnus/spam-stat.el @ 86315:35a46183236a

*** empty log message ***
author Juanma Barranquero <lekktu@gmail.com>
date Thu, 22 Nov 2007 12:26:48 +0000
parents a3c27999decb
children 8bb336490b3c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
1 ;;; spam-stat.el --- detecting spam based on statistics
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
2
75347
e3694f1cb928 Add 2007 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 68931
diff changeset
3 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
4
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
5 ;; Author: Alex Schroeder <alex@gnu.org>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
6 ;; Keywords: network
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
7 ;; URL: http://www.emacswiki.org/cgi-bin/wiki.pl?SpamStat
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
8
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
9 ;; This file is part of GNU Emacs.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
10
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
11 ;; This is free software; you can redistribute it and/or modify it
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
12 ;; under the terms of the GNU General Public License as published by
78224
24202b793a08 Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents: 75347
diff changeset
13 ;; the Free Software Foundation; either version 3, or (at your option)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
14 ;; any later version.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
15
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
16 ;; This is distributed in the hope that it will be useful, but WITHOUT
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
17 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
18 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
19 ;; License for more details.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
20
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
21 ;; You should have received a copy of the GNU General Public License
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
64085
18a818a2ee7c Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 63419
diff changeset
23 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18a818a2ee7c Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 63419
diff changeset
24 ;; Boston, MA 02110-1301, USA.
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
25
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
26 ;;; Commentary:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
27
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
28 ;; This implements spam analysis according to Paul Graham in "A Plan
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
29 ;; for Spam". The basis for all this is a statistical distribution of
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
30 ;; words for your spam and non-spam mails. We need this information
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
31 ;; in a hash-table so that the analysis can use the information when
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
32 ;; looking at your mails. Therefore, before you begin, you need tons
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
33 ;; of mails (Graham uses 4000 non-spam and 4000 spam mails for his
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
34 ;; experiments).
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
35 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
36 ;; The main interface to using spam-stat, are the following functions:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
37 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
38 ;; `spam-stat-buffer-is-spam' -- called in a buffer, that buffer is
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
39 ;; considered to be a new spam mail; use this for new mail that has
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
40 ;; not been processed before
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
41 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
42 ;; `spam-stat-buffer-is-non-spam' -- called in a buffer, that buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
43 ;; is considered to be a new non-spam mail; use this for new mail that
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
44 ;; has not been processed before
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
45 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
46 ;; `spam-stat-buffer-change-to-spam' -- called in a buffer, that
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
47 ;; buffer is no longer considered to be normal mail but spam; use this
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
48 ;; to change the status of a mail that has already been processed as
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
49 ;; non-spam
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
50 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
51 ;; `spam-stat-buffer-change-to-non-spam' -- called in a buffer, that
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
52 ;; buffer is no longer considered to be spam but normal mail; use this
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
53 ;; to change the status of a mail that has already been processed as
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
54 ;; spam
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
55 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
56 ;; `spam-stat-save' -- save the hash table to the file; the filename
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
57 ;; used is stored in the variable `spam-stat-file'
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
58 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
59 ;; `spam-stat-load' -- load the hash table from a file; the filename
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
60 ;; used is stored in the variable `spam-stat-file'
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
61 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
62 ;; `spam-stat-score-word' -- return the spam score for a word
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
63 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
64 ;; `spam-stat-score-buffer' -- return the spam score for a buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
65 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
66 ;; `spam-stat-split-fancy' -- for fancy mail splitting; add
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
67 ;; the rule (: spam-stat-split-fancy) to `nnmail-split-fancy'
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
68 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
69 ;; This requires the following in your ~/.gnus file:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
70 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
71 ;; (require 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
72 ;; (spam-stat-load)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
73
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
74 ;;; Testing:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
75
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
76 ;; Typical test will involve calls to the following functions:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
77 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
78 ;; Reset: (spam-stat-reset)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
79 ;; Learn spam: (spam-stat-process-spam-directory "~/Mail/mail/spam")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
80 ;; Learn non-spam: (spam-stat-process-non-spam-directory "~/Mail/mail/misc")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
81 ;; Save table: (spam-stat-save)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
82 ;; File size: (nth 7 (file-attributes spam-stat-file))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
83 ;; Number of words: (hash-table-count spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
84 ;; Test spam: (spam-stat-test-directory "~/Mail/mail/spam")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
85 ;; Test non-spam: (spam-stat-test-directory "~/Mail/mail/misc")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
86 ;; Reduce table size: (spam-stat-reduce-size)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
87 ;; Save table: (spam-stat-save)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
88 ;; File size: (nth 7 (file-attributes spam-stat-file))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
89 ;; Number of words: (hash-table-count spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
90 ;; Test spam: (spam-stat-test-directory "~/Mail/mail/spam")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
91 ;; Test non-spam: (spam-stat-test-directory "~/Mail/mail/misc")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
92
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
93 ;;; Dictionary Creation:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
94
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
95 ;; Typically, you will filter away mailing lists etc. using specific
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
96 ;; rules in `nnmail-split-fancy'. Somewhere among these rules, you
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
97 ;; will filter spam. Here is how you would create your dictionary:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
98
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
99 ;; Reset: (spam-stat-reset)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
100 ;; Learn spam: (spam-stat-process-spam-directory "~/Mail/mail/spam")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
101 ;; Learn non-spam: (spam-stat-process-non-spam-directory "~/Mail/mail/misc")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
102 ;; Repeat for any other non-spam group you need...
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
103 ;; Reduce table size: (spam-stat-reduce-size)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
104 ;; Save table: (spam-stat-save)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
105
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
106 ;;; Todo:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
107
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
108 ;; Speed it up. Integrate with Gnus such that it uses spam and expiry
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
109 ;; marks to call the appropriate functions when leaving the summary
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
110 ;; buffer and saves the hash table when leaving Gnus. More testing:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
111 ;; More mails, disabling SpamAssassin, double checking algorithm, find
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
112 ;; improved algorithm.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
113
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
114 ;;; Thanks:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
115
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
116 ;; Ted Zlatanov <tzz@lifelogs.com>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
117 ;; Jesper Harder <harder@myrealbox.com>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
118 ;; Dan Schmidt <dfan@dfan.org>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
119 ;; Lasse Rasinen <lrasinen@iki.fi>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
120 ;; Milan Zamazal <pdm@zamazal.org>
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
121
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
122
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
123
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
124 ;;; Code:
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
125 (require 'mail-parse)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
126
65284
6e266fbfe59b (gnus-original-article-buffer): Add defvar.
Juanma Barranquero <lekktu@gmail.com>
parents: 64754
diff changeset
127 (defvar gnus-original-article-buffer)
6e266fbfe59b (gnus-original-article-buffer): Add defvar.
Juanma Barranquero <lekktu@gmail.com>
parents: 64754
diff changeset
128
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
129 (defgroup spam-stat nil
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
130 "Statistical spam detection for Emacs.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
131 Use the functions to build a dictionary of words and their statistical
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
132 distribution in spam and non-spam mails. Then use a function to determine
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
133 whether a buffer contains spam or not."
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 57856
diff changeset
134 :version "22.1"
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
135 :group 'gnus)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
136
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
137 (defcustom spam-stat-file "~/.spam-stat.el"
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
138 "File used to save and load the dictionary.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
139 See `spam-stat-to-hash-table' for the format of the file."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
140 :type 'file
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
141 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
142
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
143 (defcustom spam-stat-install-hooks t
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
144 "Whether spam-stat should install its hooks in Gnus.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
145 This is set to nil if you use spam-stat through spam.el."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
146 :type 'boolean
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
147 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
148
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
149 (defcustom spam-stat-unknown-word-score 0.2
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
150 "The score to use for unknown words.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
151 Also used for words that don't appear often enough."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
152 :type 'number
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
153 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
154
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
155 (defcustom spam-stat-max-word-length 15
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
156 "Only words shorter than this will be considered."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
157 :type 'integer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
158 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
159
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
160 (defcustom spam-stat-max-buffer-length 10240
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
161 "Only the beginning of buffers will be analyzed.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
162 This variable says how many characters this will be."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
163 :type 'integer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
164 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
165
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
166 (defcustom spam-stat-split-fancy-spam-group "mail.spam"
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
167 "Name of the group where spam should be stored.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
168 If `spam-stat-split-fancy' is used in fancy splitting rules. Has
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
169 no effect when spam-stat is invoked through spam.el."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
170 :type 'string
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
171 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
172
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
173 (defcustom spam-stat-split-fancy-spam-threshold 0.9
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
174 "Spam score threshold in spam-stat-split-fancy."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
175 :type 'number
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
176 :group 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
177
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
178 (defcustom spam-stat-washing-hook nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
179 "Hook applied to each message before analysis."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
180 :type 'hook
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
181 :group 'spam-stat)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
182
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
183 (defcustom spam-stat-score-buffer-user-functions nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
184 "List of additional scoring functions.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
185 Called one by one on the buffer.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
186
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
187 If all of these functions return non-nil answers, these numerical
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
188 answers are added to the computed spam stat score on the buffer. If
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
189 you defun such functions, make sure they don't return the buffer in a
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
190 narrowed state or such: use, for example, `save-excursion'. Each of
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
191 your functions is also passed the initial spam-stat score which might
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
192 aid in your scoring.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
193
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
194 Also be careful when defining such functions. If they take a long
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
195 time, they will slow down your mail splitting. Thus, if the buffer is
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
196 large, don't forget to use smaller regions, by wrapping your work in,
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
197 say, `with-spam-stat-max-buffer-size'."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
198 :type '(repeat sexp)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
199 :group 'spam-stat)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
200
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
201 (defcustom spam-stat-process-directory-age 90
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
202 "Max. age of files to be processed in directory, in days.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
203 When using `spam-stat-process-spam-directory' or
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
204 `spam-stat-process-non-spam-directory', only files that have
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
205 been touched in this many days will be considered. Without
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
206 this filter, re-training spam-stat with several thousand messages
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
207 will start to take a very long time."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
208 :type 'number
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
209 :group 'spam-stat)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
210
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
211 (defvar spam-stat-last-saved-at nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
212 "Time stamp of last change of spam-stat-file on this run")
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
213
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
214 (defvar spam-stat-syntax-table
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
215 (let ((table (copy-syntax-table text-mode-syntax-table)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
216 (modify-syntax-entry ?- "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
217 (modify-syntax-entry ?_ "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
218 (modify-syntax-entry ?. "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
219 (modify-syntax-entry ?! "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
220 (modify-syntax-entry ?? "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
221 (modify-syntax-entry ?+ "w" table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
222 table)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
223 "Syntax table used when processing mails for statistical analysis.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
224 The important part is which characters are word constituents.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
225
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
226 (defvar spam-stat-dirty nil
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
227 "Whether the spam-stat database needs saving.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
228
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
229 (defvar spam-stat-buffer nil
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
230 "Buffer to use for scoring while splitting.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
231 This is set by hooking into Gnus.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
232
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
233 (defvar spam-stat-buffer-name " *spam stat buffer*"
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
234 "Name of the `spam-stat-buffer'.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
235
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
236 (defvar spam-stat-coding-system
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
237 (if (mm-coding-system-p 'emacs-mule) 'emacs-mule 'raw-text)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
238 "Coding system used for `spam-stat-file'.")
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
239
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
240 ;; Hooking into Gnus
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
241
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
242 (defun spam-stat-store-current-buffer ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
243 "Store a copy of the current buffer in `spam-stat-buffer'."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
244 (let ((buf (current-buffer)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
245 (with-current-buffer (get-buffer-create spam-stat-buffer-name)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
246 (erase-buffer)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
247 (insert-buffer-substring buf)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
248 (setq spam-stat-buffer (current-buffer)))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
249
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
250 (defun spam-stat-store-gnus-article-buffer ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
251 "Store a copy of the current article in `spam-stat-buffer'.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
252 This uses `gnus-article-buffer'."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
253 (with-current-buffer gnus-original-article-buffer
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
254 (spam-stat-store-current-buffer)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
255
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
256 ;; Data -- not using defstruct in order to save space and time
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
257
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
258 (defvar spam-stat (make-hash-table :test 'equal)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
259 "Hash table used to store the statistics.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
260 Use `spam-stat-load' to load the file.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
261 Every word is used as a key in this table. The value is a vector.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
262 Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good',
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
263 `spam-stat-bad', and `spam-stat-score' to access this vector.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
264
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
265 (defvar spam-stat-ngood 0
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
266 "The number of good mails in the dictionary.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
267
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
268 (defvar spam-stat-nbad 0
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
269 "The number of bad mails in the dictionary.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
270
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
271 (defvar spam-stat-error-holder nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
272 "A holder for condition-case errors while scoring buffers.")
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
273
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
274 (defsubst spam-stat-good (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
275 "Return the number of times this word belongs to good mails."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
276 (aref entry 0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
277
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
278 (defsubst spam-stat-bad (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
279 "Return the number of times this word belongs to bad mails."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
280 (aref entry 1))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
281
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
282 (defsubst spam-stat-score (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
283 "Set the score of this word."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
284 (if entry
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
285 (aref entry 2)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
286 spam-stat-unknown-word-score))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
287
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
288 (defsubst spam-stat-set-good (entry value)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
289 "Set the number of times this word belongs to good mails."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
290 (aset entry 0 value))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
291
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
292 (defsubst spam-stat-set-bad (entry value)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
293 "Set the number of times this word belongs to bad mails."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
294 (aset entry 1 value))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
295
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
296 (defsubst spam-stat-set-score (entry value)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
297 "Set the score of this word."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
298 (aset entry 2 value))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
299
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
300 (defsubst spam-stat-make-entry (good bad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
301 "Return a vector with the given properties."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
302 (let ((entry (vector good bad nil)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
303 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
304 entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
305
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
306 ;; Computing
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
307
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
308 (defun spam-stat-compute-score (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
309 "Compute the score of this word. 1.0 means spam."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
310 ;; promote all numbers to floats for the divisions
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
311 (let* ((g (* 2.0 (spam-stat-good entry)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
312 (b (float (spam-stat-bad entry))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
313 (cond ((< (+ g b) 5)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
314 .2)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
315 ((= 0 spam-stat-ngood)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
316 .99)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
317 ((= 0 spam-stat-nbad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
318 .01)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
319 (t
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
320 (max .01
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
321 (min .99 (/ (/ b spam-stat-nbad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
322 (+ (/ g spam-stat-ngood)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
323 (/ b spam-stat-nbad)))))))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
324
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
325 ;; Parsing
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
326
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
327 (defmacro with-spam-stat-max-buffer-size (&rest body)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
328 "Narrow the buffer down to the first 4k characters, then evaluate BODY."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
329 `(save-restriction
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
330 (when (> (- (point-max)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
331 (point-min))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
332 spam-stat-max-buffer-length)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
333 (narrow-to-region (point-min)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
334 (+ (point-min) spam-stat-max-buffer-length)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
335 ,@body))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
336
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
337 (defun spam-stat-buffer-words ()
68931
d7445550220c (spam-stat-buffer-words): Fix typo in docstring.
Juanma Barranquero <lekktu@gmail.com>
parents: 68633
diff changeset
338 "Return a hash table of words and number of occurrences in the buffer."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
339 (run-hooks 'spam-stat-washing-hook)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
340 (with-spam-stat-max-buffer-size
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
341 (with-syntax-table spam-stat-syntax-table
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
342 (goto-char (point-min))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
343 (let ((result (make-hash-table :test 'equal))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
344 word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
345 (while (re-search-forward "\\w+" nil t)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
346 (setq word (match-string-no-properties 0)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
347 count (1+ (gethash word result 0)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
348 (when (< (length word) spam-stat-max-word-length)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
349 (puthash word count result)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
350 result))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
351
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
352 (defun spam-stat-buffer-is-spam ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
353 "Consider current buffer to be a new spam mail."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
354 (setq spam-stat-nbad (1+ spam-stat-nbad))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
355 (maphash
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
356 (lambda (word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
357 (let ((entry (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
358 (if entry
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
359 (spam-stat-set-bad entry (+ count (spam-stat-bad entry)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
360 (setq entry (spam-stat-make-entry 0 count)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
361 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
362 (puthash word entry spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
363 (spam-stat-buffer-words))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
364 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
365
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
366 (defun spam-stat-buffer-is-non-spam ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
367 "Consider current buffer to be a new non-spam mail."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
368 (setq spam-stat-ngood (1+ spam-stat-ngood))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
369 (maphash
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
370 (lambda (word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
371 (let ((entry (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
372 (if entry
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
373 (spam-stat-set-good entry (+ count (spam-stat-good entry)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
374 (setq entry (spam-stat-make-entry count 0)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
375 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
376 (puthash word entry spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
377 (spam-stat-buffer-words))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
378 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
379
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
380 (defun spam-stat-buffer-change-to-spam ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
381 "Consider current buffer no longer normal mail but spam."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
382 (setq spam-stat-nbad (1+ spam-stat-nbad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
383 spam-stat-ngood (1- spam-stat-ngood))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
384 (maphash
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
385 (lambda (word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
386 (let ((entry (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
387 (if (not entry)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
388 (gnus-message 8 "This buffer has unknown words in it")
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
389 (spam-stat-set-good entry (- (spam-stat-good entry) count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
390 (spam-stat-set-bad entry (+ (spam-stat-bad entry) count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
391 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
392 (puthash word entry spam-stat))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
393 (spam-stat-buffer-words))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
394 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
395
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
396 (defun spam-stat-buffer-change-to-non-spam ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
397 "Consider current buffer no longer spam but normal mail."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
398 (setq spam-stat-nbad (1- spam-stat-nbad)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
399 spam-stat-ngood (1+ spam-stat-ngood))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
400 (maphash
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
401 (lambda (word count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
402 (let ((entry (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
403 (if (not entry)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
404 (gnus-message 8 "This buffer has unknown words in it")
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
405 (spam-stat-set-good entry (+ (spam-stat-good entry) count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
406 (spam-stat-set-bad entry (- (spam-stat-bad entry) count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
407 (spam-stat-set-score entry (spam-stat-compute-score entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
408 (puthash word entry spam-stat))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
409 (spam-stat-buffer-words))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
410 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
411
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
412 ;; Saving and Loading
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
413
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
414 (defun spam-stat-save (&optional force)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
415 "Save the `spam-stat' hash table as lisp file.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
416 With a prefix argument save unconditionally."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
417 (interactive "P")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
418 (when (or force spam-stat-dirty)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
419 (let ((coding-system-for-write spam-stat-coding-system))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
420 (with-temp-file spam-stat-file
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
421 (let ((standard-output (current-buffer))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
422 (font-lock-maximum-size 0))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
423 (insert (format ";-*- coding: %s; -*-\n" spam-stat-coding-system))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
424 (insert (format "(setq spam-stat-ngood %d spam-stat-nbad %d
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
425 spam-stat (spam-stat-to-hash-table '(" spam-stat-ngood spam-stat-nbad))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
426 (maphash (lambda (word entry)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
427 (prin1 (list word
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
428 (spam-stat-good entry)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
429 (spam-stat-bad entry))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
430 spam-stat)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
431 (insert ")))"))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
432 (message "Saved %s." spam-stat-file)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
433 (setq spam-stat-dirty nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
434 spam-stat-last-saved-at (nth 5 (file-attributes spam-stat-file)))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
435
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
436 (defun spam-stat-load ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
437 "Read the `spam-stat' hash table from disk."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
438 ;; TODO: maybe we should warn the user if spam-stat-dirty is t?
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
439 (let ((coding-system-for-read spam-stat-coding-system))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
440 (cond (spam-stat-dirty (message "Spam stat not loaded: spam-stat-dirty t"))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
441 ((or (not (boundp 'spam-stat-last-saved-at))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
442 (null spam-stat-last-saved-at)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
443 (not (equal spam-stat-last-saved-at
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
444 (nth 5 (file-attributes spam-stat-file)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
445 (progn
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
446 (load-file spam-stat-file)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
447 (setq spam-stat-dirty nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
448 spam-stat-last-saved-at
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
449 (nth 5 (file-attributes spam-stat-file)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
450 (t (message "Spam stat file not loaded: no change in disk..")))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
451
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
452 (defun spam-stat-to-hash-table (entries)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
453 "Turn list ENTRIES into a hash table and store as `spam-stat'.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
454 Every element in ENTRIES has the form \(WORD GOOD BAD) where WORD is
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
455 the word string, NGOOD is the number of good mails it has appeared in,
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
456 NBAD is the number of bad mails it has appeared in, GOOD is the number
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
457 of times it appeared in good mails, and BAD is the number of times it
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
458 has appeared in bad mails."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
459 (let ((table (make-hash-table :size (length entries)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
460 :test 'equal)))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
461 (mapc (lambda (l)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
462 (puthash (car l)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
463 (spam-stat-make-entry (nth 1 l) (nth 2 l))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
464 table))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
465 entries)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
466 table))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
467
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
468 (defun spam-stat-reset ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
469 "Reset `spam-stat' to an empty hash-table.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
470 This deletes all the statistics."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
471 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
472 (setq spam-stat (make-hash-table :test 'equal)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
473 spam-stat-ngood 0
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
474 spam-stat-nbad 0)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
475 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
476
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
477 ;; Scoring buffers
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
478
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
479 (defvar spam-stat-score-data nil
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
480 "Raw data used in the last run of `spam-stat-score-buffer'.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
481
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
482 (defsubst spam-stat-score-word (word)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
483 "Return score for WORD.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
484 The default score for unknown words is stored in
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
485 `spam-stat-unknown-word-score'."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
486 (spam-stat-score (gethash word spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
487
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
488 (defun spam-stat-buffer-words-with-scores ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
489 "Process current buffer, return the 15 most conspicuous words.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
490 These are the words whose spam-stat differs the most from 0.5.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
491 The list returned contains elements of the form \(WORD SCORE DIFF),
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
492 where DIFF is the difference between SCORE and 0.5."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
493 (let (result word score)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
494 (maphash (lambda (word ignore)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
495 (setq score (spam-stat-score-word word)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
496 result (cons (list word score (abs (- score 0.5)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
497 result)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
498 (spam-stat-buffer-words))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
499 (setq result (sort result (lambda (a b) (< (nth 2 b) (nth 2 a)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
500 (setcdr (nthcdr 14 result) nil)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
501 result))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
502
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
503 (defun spam-stat-score-buffer ()
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
504 "Return a score describing the spam-probability for this buffer.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
505 Add user supplied modifications if supplied."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
506 (interactive) ; helps in debugging.
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
507 (setq spam-stat-score-data (spam-stat-buffer-words-with-scores))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
508 (let* ((probs (mapcar 'cadr spam-stat-score-data))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
509 (prod (apply #'* probs))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
510 (score0
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
511 (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
512 probs)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
513 (score1s
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
514 (condition-case
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
515 spam-stat-error-holder
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
516 (spam-stat-score-buffer-user score0)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
517 (error nil)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
518 (ans
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
519 (if score1s (+ score0 score1s) score0)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
520 (when (interactive-p)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
521 (message "%S" ans))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
522 ans))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
523
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
524 (defun spam-stat-score-buffer-user (&rest args)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
525 (let* ((scores
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
526 (mapcar
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
527 (lambda (fn)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
528 (apply fn args))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
529 spam-stat-score-buffer-user-functions)))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
530 (if (memq nil scores) nil
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
531 (apply #'+ scores))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
532
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
533 (defun spam-stat-split-fancy ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
534 "Return the name of the spam group if the current mail is spam.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
535 Use this function on `nnmail-split-fancy'. If you are interested in
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
536 the raw data used for the last run of `spam-stat-score-buffer',
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
537 check the variable `spam-stat-score-data'."
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
538 (condition-case spam-stat-error-holder
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
539 (progn
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
540 (set-buffer spam-stat-buffer)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
541 (goto-char (point-min))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
542 (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshold)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
543 (when (boundp 'nnmail-split-trace)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
544 (mapc (lambda (entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
545 (push entry nnmail-split-trace))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
546 spam-stat-score-data))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
547 spam-stat-split-fancy-spam-group))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
548 (error (message "Error in spam-stat-split-fancy: %S" spam-stat-error-holder)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
549 nil)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
550
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
551 ;; Testing
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
552
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
553 (defun spam-stat-strip-xref ()
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
554 "Strip the the Xref header."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
555 (save-restriction
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
556 (mail-narrow-to-head)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
557 (when (re-search-forward "^Xref:.*\n" nil t)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
558 (delete-region (match-beginning 0) (match-end 0)))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
559
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
560 (defun spam-stat-process-directory (dir func)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
561 "Process all the regular files in directory DIR using function FUNC."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
562 (let* ((files (directory-files dir t "^[^.]"))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
563 (max (/ (length files) 100.0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
564 (count 0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
565 (with-temp-buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
566 (dolist (f files)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
567 (when (and (file-readable-p f)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
568 (file-regular-p f)
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
569 (> (nth 7 (file-attributes f)) 0)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
570 (< (time-to-number-of-days (time-since (nth 5 (file-attributes f))))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
571 spam-stat-process-directory-age))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
572 (setq count (1+ count))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
573 (message "Reading %s: %.2f%%" dir (/ count max))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
574 (insert-file-contents-literally f)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
575 (spam-stat-strip-xref)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
576 (funcall func)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
577 (erase-buffer))))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
578
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
579 (defun spam-stat-process-spam-directory (dir)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
580 "Process all the regular files in directory DIR as spam."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
581 (interactive "D")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
582 (spam-stat-process-directory dir 'spam-stat-buffer-is-spam))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
583
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
584 (defun spam-stat-process-non-spam-directory (dir)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
585 "Process all the regular files in directory DIR as non-spam."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
586 (interactive "D")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
587 (spam-stat-process-directory dir 'spam-stat-buffer-is-non-spam))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
588
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
589 (defun spam-stat-count ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
590 "Return size of `spam-stat'."
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
591 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
592 (hash-table-count spam-stat))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
593
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
594 (defun spam-stat-test-directory (dir &optional verbose)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
595 "Test all the regular files in directory DIR for spam.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
596 If the result is 1.0, then all files are considered spam.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
597 If the result is 0.0, non of the files is considered spam.
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
598 You can use this to determine error rates.
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
599
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
600 If VERBOSE is non-nil display names of files detected as spam or
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
601 non-spam in a temporary buffer. If it is the symbol `ham',
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
602 display non-spam files; otherwise display spam files."
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
603 (interactive "DDirectory: ")
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
604 (let* ((files (directory-files dir t "^[^.]"))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
605 display-files
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
606 buffer-score
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
607 (total (length files))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
608 (score 0.0); float
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
609 (max (/ total 100.0)); float
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
610 (count 0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
611 (with-temp-buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
612 (dolist (f files)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
613 (when (and (file-readable-p f)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
614 (file-regular-p f)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
615 (> (nth 7 (file-attributes f)) 0))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
616 (setq count (1+ count))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
617 (message "Reading %.2f%%, score %.2f"
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
618 (/ count max) (/ score count))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
619 (insert-file-contents-literally f)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
620 (setq buffer-score (spam-stat-score-buffer))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
621 (when (> buffer-score 0.9)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
622 (setq score (1+ score)))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
623 (when verbose
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
624 (if (> buffer-score 0.9)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
625 (unless (eq verbose 'ham) (push f display-files))
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
626 (when (eq verbose 'ham) (push f display-files))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
627 (erase-buffer))))
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
628 (when display-files
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
629 (with-output-to-temp-buffer "*spam-stat results*"
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
630 (dolist (file display-files)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
631 (princ file)
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
632 (terpri))))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
633 (message "Final score: %d / %d = %f" score total (/ score total))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
634
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
635 ;; Shrinking the dictionary
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
636
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
637 (defun spam-stat-reduce-size (&optional count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
638 "Reduce the size of `spam-stat'.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
639 This removes all words that occur less than COUNT from the dictionary.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
640 COUNT defaults to 5"
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
641 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
642 (setq count (or count 5))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
643 (maphash (lambda (key entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
644 (when (< (+ (spam-stat-good entry)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
645 (spam-stat-bad entry))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
646 count)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
647 (remhash key spam-stat)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
648 spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
649 (setq spam-stat-dirty t))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
650
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
651 (defun spam-stat-install-hooks-function ()
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
652 "Install the spam-stat function hooks."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
653 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
654 (add-hook 'nnmail-prepare-incoming-message-hook
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
655 'spam-stat-store-current-buffer)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
656 (add-hook 'gnus-select-article-hook
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
657 'spam-stat-store-gnus-article-buffer))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
658
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
659 (when spam-stat-install-hooks
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
660 (spam-stat-install-hooks-function))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
661
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
662 (defun spam-stat-unload-hook ()
85712
a3c27999decb Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
Miles Bader <miles@gnu.org>
parents: 78224
diff changeset
663 "Uninstall the spam-stat function hooks."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
664 (interactive)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
665 (remove-hook 'nnmail-prepare-incoming-message-hook
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
666 'spam-stat-store-current-buffer)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
667 (remove-hook 'gnus-select-article-hook
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
668 'spam-stat-store-gnus-article-buffer))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
669
57550
a19498f00f9a (spam-stat-unload-hook): Set as a variable w/ add-hook.
Richard M. Stallman <rms@gnu.org>
parents: 56927
diff changeset
670 (add-hook 'spam-stat-unload-hook 'spam-stat-unload-hook)
a19498f00f9a (spam-stat-unload-hook): Set as a variable w/ add-hook.
Richard M. Stallman <rms@gnu.org>
parents: 56927
diff changeset
671
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
672 (provide 'spam-stat)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
673
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
674 ;;; arch-tag: ff1d2200-8ddb-42fb-bb7b-1b5e20448554
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
diff changeset
675 ;;; spam-stat.el ends here