annotate lib-src/b2m.pl @ 82448:6d0e8e2e2868

*** empty log message ***
author Thien-Thi Nguyen <ttn@gnuvola.org>
date Fri, 17 Aug 2007 22:06:50 +0000
parents 1f2482de3237
children 5714ff101fd9 f55f9811f5d7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
1 #!/usr/bin/perl
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
2
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
3 # b2m.pl - Script to convert a Babyl file to an mbox file
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
4
75458
9f287ea4800c Add missing Copyright header. Years from date of installation in
Glenn Morris <rgm@gnu.org>
parents: 64083
diff changeset
5 # Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
9f287ea4800c Add missing Copyright header. Years from date of installation in
Glenn Morris <rgm@gnu.org>
parents: 64083
diff changeset
6 # Free Software Foundation, Inc.
9f287ea4800c Add missing Copyright header. Years from date of installation in
Glenn Morris <rgm@gnu.org>
parents: 64083
diff changeset
7
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
8 # This program is free software; you can redistribute it and/or modify
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
9 # it under the terms of the GNU General Public License as published by
78257
1f2482de3237 Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents: 75458
diff changeset
10 # the Free Software Foundation; either version 3, or (at your option)
1f2482de3237 Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents: 75458
diff changeset
11 # any later version.
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
12
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
13 # This program is distributed in the hope that it will be useful, but
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
16 # General Public License for more details.
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
17
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
18 # You should have received a copy of the GNU General Public License
78257
1f2482de3237 Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents: 75458
diff changeset
19 # along with this program; see the file COPYING. If not, write to the
1f2482de3237 Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents: 75458
diff changeset
20 # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
1f2482de3237 Switch license to GPLv3 or later.
Glenn Morris <rgm@gnu.org>
parents: 75458
diff changeset
21 # Boston, MA 02110-1301 USA.
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
22
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
23 # Maintained by Jonathan Kamens <jik@kamens.brookline.ma.us>.
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
24
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
25 # Requires CPAN modules: MailTools (for Mail::Address), TimeDate (for
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
26 # Date::Parse).
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
27
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
28 use warnings;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
29 use strict;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
30 use File::Basename;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
31 use Getopt::Long;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
32 use Mail::Address;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
33 use Date::Parse;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
34
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
35 my($whoami) = basename $0;
64083
23a17af379b1 Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 52401
diff changeset
36 my($version) = '$Revision$';
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
37 my($usage) = "Usage: $whoami [--help] [--version] [--[no]full-headers] [Babyl-file]
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
38 \tBy default, full headers are printed.\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
39
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
40 my($opt_help, $opt_version);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
41 my($opt_full_headers) = 1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
42
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
43 die $usage if (! GetOptions(
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
44 'help' => \$opt_help,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
45 'version' => \$opt_version,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
46 'full-headers!' => \$opt_full_headers,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
47 ));
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
48
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
49 if ($opt_help) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
50 print $usage;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
51 exit;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
52 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
53 elsif ($opt_version) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
54 print "$whoami version: $version\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
55 exit;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
56 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
57
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
58 die $usage if (@ARGV > 1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
59
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
60 $/ = "\n\037";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
61
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
62 if (<> !~ /^BABYL OPTIONS:/) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
63 die "$whoami: $ARGV is not a Babyl file\n$usage";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
64 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
65
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
66 while (<>) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
67 my($msg_num) = $. - 1;
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
68 my($labels, $pruned, $full_header, $header);
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
69 my($from_line, $from_addr);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
70 my($time);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
71
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
72 # This will strip the initial form feed, any whitespace that may
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
73 # be following it, and then a newline
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
74 s/^\s+//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
75 # This will strip the ^_ off of the end of the message
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
76 s/\037$//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
77
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
78 if (! s/(.*)\n//) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
79 malformatted:
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
80 warn "$whoami: message $msg_num in $ARGV is malformatted\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
81 next;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
82 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
83 $labels = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
84
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
85 # Strip the integer indicating whether the header is pruned
49600
23a1cea22d13 Trailing whitespace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 48810
diff changeset
86 $labels =~ s/^(\d+)[,\s]*//;
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
87 $pruned = $1;
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
88
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
89 s/(?:((?:.+\n)+)\n*)?\*\*\* EOOH \*\*\*\n+// || goto malformatted;
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
90 $full_header = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
91
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
92 if (s/((?:.+\n)+)\n+//) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
93 $header = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
94 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
95 else {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
96 # Message has no body
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
97 $header = $_;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
98 $_ = '';
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
99 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
100
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
101 # "$pruned eq '0'" is different from "! $pruned". We want to make
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
102 # sure that we found a valid label line which explicitly indicated
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
103 # that the header was not pruned.
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
104 if ((! $full_header) || ($pruned eq '0')) {
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
105 $full_header = $header;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
106 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
107
48810
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
108 # End message with two newlines (some mbox parsers require a blank
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
109 # line before the next "From " line).
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
110 s/\s+$/\n\n/;
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
111
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
112 # Quote "^From "
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
113 s/(^|\n)From /$1>From /g;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
114
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
115 # Strip extra commas and whitespace from the end
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
116 $labels =~ s/[,\s]+$//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
117 # Now collapse extra commas and whitespace in the remaining label string
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
118 $labels =~ s/[,\s]+/, /g;
49600
23a1cea22d13 Trailing whitespace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 48810
diff changeset
119
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
120 foreach my $rmail_header qw(summary-line x-coding-system) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
121 $full_header =~ s/(^|\n)$rmail_header:.*\n/$1/i;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
122 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
123
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
124 if ($full_header =~ s/(^|\n)mail-from:\s*(From .*)\n/$1/i) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
125 ($from_line = $2) =~ s/\s*$/\n/;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
126 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
127 else {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
128 foreach my $addr_header qw(return-path from really-from sender) {
46737
7c794ace9e1a Fix regexp for finding return address fields.
Pavel Janík <Pavel@Janik.cz>
parents: 46184
diff changeset
129 if ($full_header =~ /(?:^|\n)$addr_header:\s*(.*\n(?:\B.*\n)*)/i) {
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
130 my($addr) = Mail::Address->parse($1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
131 $from_addr = $addr->address($addr);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
132 last;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
133 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
134 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
135
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
136 if (! $from_addr) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
137 $from_addr = "Babyl_to_mail_by_$whoami\@localhost";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
138 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
139
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
140 if ($full_header =~ /(?:^|\n)date:\s*(\S.*\S)/i) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
141 $time = str2time($1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
142 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
143
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
144 if (! $time) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
145 # No Date header or we failed to parse it
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
146 $time = time;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
147 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
148
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
149 $from_line = "From " . $from_addr . " " . localtime($time) . "\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
150 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
151
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
152 print($from_line, ($opt_full_headers ? $full_header : $header),
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
153 ($labels ? "X-Babyl-Labels: $labels\n" : ""), "\n",
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
154 $_) || die "$whoami: error writing to stdout: $!\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
155 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
156
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
157 close(STDOUT) || die "$whoami: Error closing stdout: $!\n";
52401
695cf19ef79e Add arch taglines
Miles Bader <miles@gnu.org>
parents: 49600
diff changeset
158
695cf19ef79e Add arch taglines
Miles Bader <miles@gnu.org>
parents: 49600
diff changeset
159 # arch-tag: 8c7c8ab0-721c-46d7-ba3e-139801240aa8