annotate lib-src/b2m.pl @ 49795:0f184780c237 RMAIL-MBOX-BASE

("Cyrillic-KOI8"): Fix input-method. ("Russian"): New. ("Bulgarian"): Add tutorial.
author Dave Love <fx@gnu.org>
date Fri, 14 Feb 2003 12:55:08 +0000
parents 23a1cea22d13
children 695cf19ef79e d7ddb3e565de
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
1 #!/usr/bin/perl
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
2
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
3 # b2m.pl - Script to convert a Babyl file to an mbox file
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
4
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
5 # This program is free software; you can redistribute it and/or modify
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
6 # it under the terms of the GNU General Public License as published by
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
7 # the Free Software Foundation; either version 2 of the License, or
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
8 # (at your option) any later version.
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
9
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
10 # This program is distributed in the hope that it will be useful, but
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
13 # General Public License for more details.
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
14
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
15 # You should have received a copy of the GNU General Public License
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
16 # along with this program; if not, write to the Free Software
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
18 # USA.
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
19
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
20 # Maintained by Jonathan Kamens <jik@kamens.brookline.ma.us>.
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
21
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
22 # Requires CPAN modules: MailTools (for Mail::Address), TimeDate (for
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
23 # Date::Parse).
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
24
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
25 use warnings;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
26 use strict;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
27 use File::Basename;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
28 use Getopt::Long;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
29 use Mail::Address;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
30 use Date::Parse;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
31
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
32 my($whoami) = basename $0;
49600
23a1cea22d13 Trailing whitespace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 48810
diff changeset
33 my($version) = '$Revision: 1.4 $';
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
34 my($usage) = "Usage: $whoami [--help] [--version] [--[no]full-headers] [Babyl-file]
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
35 \tBy default, full headers are printed.\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
36
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
37 my($opt_help, $opt_version);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
38 my($opt_full_headers) = 1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
39
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
40 die $usage if (! GetOptions(
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
41 'help' => \$opt_help,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
42 'version' => \$opt_version,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
43 'full-headers!' => \$opt_full_headers,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
44 ));
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
45
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
46 if ($opt_help) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
47 print $usage;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
48 exit;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
49 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
50 elsif ($opt_version) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
51 print "$whoami version: $version\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
52 exit;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
53 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
54
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
55 die $usage if (@ARGV > 1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
56
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
57 $/ = "\n\037";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
58
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
59 if (<> !~ /^BABYL OPTIONS:/) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
60 die "$whoami: $ARGV is not a Babyl file\n$usage";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
61 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
62
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
63 while (<>) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
64 my($msg_num) = $. - 1;
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
65 my($labels, $pruned, $full_header, $header);
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
66 my($from_line, $from_addr);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
67 my($time);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
68
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
69 # This will strip the initial form feed, any whitespace that may
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
70 # be following it, and then a newline
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
71 s/^\s+//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
72 # This will strip the ^_ off of the end of the message
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
73 s/\037$//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
74
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
75 if (! s/(.*)\n//) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
76 malformatted:
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
77 warn "$whoami: message $msg_num in $ARGV is malformatted\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
78 next;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
79 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
80 $labels = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
81
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
82 # Strip the integer indicating whether the header is pruned
49600
23a1cea22d13 Trailing whitespace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 48810
diff changeset
83 $labels =~ s/^(\d+)[,\s]*//;
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
84 $pruned = $1;
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
85
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
86 s/(?:((?:.+\n)+)\n*)?\*\*\* EOOH \*\*\*\n+// || goto malformatted;
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
87 $full_header = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
88
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
89 if (s/((?:.+\n)+)\n+//) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
90 $header = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
91 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
92 else {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
93 # Message has no body
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
94 $header = $_;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
95 $_ = '';
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
96 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
97
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
98 # "$pruned eq '0'" is different from "! $pruned". We want to make
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
99 # sure that we found a valid label line which explicitly indicated
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
100 # that the header was not pruned.
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
101 if ((! $full_header) || ($pruned eq '0')) {
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
102 $full_header = $header;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
103 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
104
48810
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
105 # End message with two newlines (some mbox parsers require a blank
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
106 # line before the next "From " line).
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
107 s/\s+$/\n\n/;
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
108
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
109 # Quote "^From "
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
110 s/(^|\n)From /$1>From /g;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
111
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
112 # Strip extra commas and whitespace from the end
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
113 $labels =~ s/[,\s]+$//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
114 # Now collapse extra commas and whitespace in the remaining label string
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
115 $labels =~ s/[,\s]+/, /g;
49600
23a1cea22d13 Trailing whitespace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 48810
diff changeset
116
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
117 foreach my $rmail_header qw(summary-line x-coding-system) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
118 $full_header =~ s/(^|\n)$rmail_header:.*\n/$1/i;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
119 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
120
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
121 if ($full_header =~ s/(^|\n)mail-from:\s*(From .*)\n/$1/i) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
122 ($from_line = $2) =~ s/\s*$/\n/;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
123 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
124 else {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
125 foreach my $addr_header qw(return-path from really-from sender) {
46737
7c794ace9e1a Fix regexp for finding return address fields.
Pavel Janík <Pavel@Janik.cz>
parents: 46184
diff changeset
126 if ($full_header =~ /(?:^|\n)$addr_header:\s*(.*\n(?:\B.*\n)*)/i) {
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
127 my($addr) = Mail::Address->parse($1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
128 $from_addr = $addr->address($addr);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
129 last;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
130 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
131 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
132
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
133 if (! $from_addr) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
134 $from_addr = "Babyl_to_mail_by_$whoami\@localhost";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
135 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
136
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
137 if ($full_header =~ /(?:^|\n)date:\s*(\S.*\S)/i) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
138 $time = str2time($1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
139 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
140
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
141 if (! $time) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
142 # No Date header or we failed to parse it
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
143 $time = time;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
144 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
145
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
146 $from_line = "From " . $from_addr . " " . localtime($time) . "\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
147 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
148
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
149 print($from_line, ($opt_full_headers ? $full_header : $header),
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
150 ($labels ? "X-Babyl-Labels: $labels\n" : ""), "\n",
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
151 $_) || die "$whoami: error writing to stdout: $!\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
152 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
153
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
154 close(STDOUT) || die "$whoami: Error closing stdout: $!\n";