annotate lib-src/b2m.pl @ 103459:51fc0317b0d9

*** empty log message ***
author Kenichi Handa <handa@m17n.org>
date Wed, 17 Jun 2009 07:32:18 +0000
parents a9f7e446141d
children 1d1d5d9bd884
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
1 #!/usr/bin/perl
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
2
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
3 # b2m.pl - Script to convert a Babyl file to an mbox file
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
4
100958
a9f7e446141d Add 2009 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 94828
diff changeset
5 # Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
75458
9f287ea4800c Add missing Copyright header. Years from date of installation in
Glenn Morris <rgm@gnu.org>
parents: 64083
diff changeset
6 # Free Software Foundation, Inc.
9f287ea4800c Add missing Copyright header. Years from date of installation in
Glenn Morris <rgm@gnu.org>
parents: 64083
diff changeset
7
94828
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
8 # Maintainer: Jonathan Kamens <jik@kamens.brookline.ma.us>
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
9
94828
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
10 # This program is free software: you can redistribute it and/or modify
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
11 # it under the terms of the GNU General Public License as published by
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
12 # the Free Software Foundation, either version 3 of the License, or
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
13 # (at your option) any later version.
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
14
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
15 # This program is distributed in the hope that it will be useful,
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
18 # GNU General Public License for more details.
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
19
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
20 # You should have received a copy of the GNU General Public License
94828
3a4bc081639c Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 79748
diff changeset
21 # along with this program. If not, see <http://www.gnu.org/licenses/>.
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
22
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
23
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
24 # Requires CPAN modules: MailTools (for Mail::Address), TimeDate (for
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
25 # Date::Parse).
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
26
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
27 use warnings;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
28 use strict;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
29 use File::Basename;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
30 use Getopt::Long;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
31 use Mail::Address;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
32 use Date::Parse;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
33
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
34 my($whoami) = basename $0;
64083
23a17af379b1 Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 52401
diff changeset
35 my($version) = '$Revision$';
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
36 my($usage) = "Usage: $whoami [--help] [--version] [--[no]full-headers] [Babyl-file]
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
37 \tBy default, full headers are printed.\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
38
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
39 my($opt_help, $opt_version);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
40 my($opt_full_headers) = 1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
41
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
42 die $usage if (! GetOptions(
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
43 'help' => \$opt_help,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
44 'version' => \$opt_version,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
45 'full-headers!' => \$opt_full_headers,
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
46 ));
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
47
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
48 if ($opt_help) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
49 print $usage;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
50 exit;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
51 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
52 elsif ($opt_version) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
53 print "$whoami version: $version\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
54 exit;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
55 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
56
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
57 die $usage if (@ARGV > 1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
58
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
59 $/ = "\n\037";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
60
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
61 if (<> !~ /^BABYL OPTIONS:/) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
62 die "$whoami: $ARGV is not a Babyl file\n$usage";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
63 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
64
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
65 while (<>) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
66 my($msg_num) = $. - 1;
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
67 my($labels, $pruned, $full_header, $header);
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
68 my($from_line, $from_addr);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
69 my($time);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
70
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
71 # This will strip the initial form feed, any whitespace that may
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
72 # be following it, and then a newline
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
73 s/^\s+//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
74 # This will strip the ^_ off of the end of the message
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
75 s/\037$//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
76
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
77 if (! s/(.*)\n//) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
78 malformatted:
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
79 warn "$whoami: message $msg_num in $ARGV is malformatted\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
80 next;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
81 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
82 $labels = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
83
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
84 # Strip the integer indicating whether the header is pruned
49600
23a1cea22d13 Trailing whitespace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 48810
diff changeset
85 $labels =~ s/^(\d+)[,\s]*//;
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
86 $pruned = $1;
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
87
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
88 s/(?:((?:.+\n)+)\n*)?\*\*\* EOOH \*\*\*\n+// || goto malformatted;
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
89 $full_header = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
90
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
91 if (s/((?:.+\n)+)\n+//) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
92 $header = $1;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
93 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
94 else {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
95 # Message has no body
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
96 $header = $_;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
97 $_ = '';
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
98 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
99
46184
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
100 # "$pruned eq '0'" is different from "! $pruned". We want to make
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
101 # sure that we found a valid label line which explicitly indicated
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
102 # that the header was not pruned.
81235cad75cb Obey the rmail file and use the unpruned header properly.
Pavel Janík <Pavel@Janik.cz>
parents: 46032
diff changeset
103 if ((! $full_header) || ($pruned eq '0')) {
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
104 $full_header = $header;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
105 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
106
48810
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
107 # End message with two newlines (some mbox parsers require a blank
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
108 # line before the next "From " line).
1626973bdb2b Make sure every message ends with a blank line, because some mbox parsers
Pavel Janík <Pavel@Janik.cz>
parents: 46737
diff changeset
109 s/\s+$/\n\n/;
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
110
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
111 # Quote "^From "
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
112 s/(^|\n)From /$1>From /g;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
113
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
114 # Strip extra commas and whitespace from the end
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
115 $labels =~ s/[,\s]+$//;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
116 # Now collapse extra commas and whitespace in the remaining label string
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
117 $labels =~ s/[,\s]+/, /g;
49600
23a1cea22d13 Trailing whitespace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 48810
diff changeset
118
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
119 foreach my $rmail_header qw(summary-line x-coding-system) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
120 $full_header =~ s/(^|\n)$rmail_header:.*\n/$1/i;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
121 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
122
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
123 if ($full_header =~ s/(^|\n)mail-from:\s*(From .*)\n/$1/i) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
124 ($from_line = $2) =~ s/\s*$/\n/;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
125 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
126 else {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
127 foreach my $addr_header qw(return-path from really-from sender) {
46737
7c794ace9e1a Fix regexp for finding return address fields.
Pavel Janík <Pavel@Janik.cz>
parents: 46184
diff changeset
128 if ($full_header =~ /(?:^|\n)$addr_header:\s*(.*\n(?:\B.*\n)*)/i) {
46032
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
129 my($addr) = Mail::Address->parse($1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
130 $from_addr = $addr->address($addr);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
131 last;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
132 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
133 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
134
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
135 if (! $from_addr) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
136 $from_addr = "Babyl_to_mail_by_$whoami\@localhost";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
137 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
138
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
139 if ($full_header =~ /(?:^|\n)date:\s*(\S.*\S)/i) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
140 $time = str2time($1);
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
141 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
142
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
143 if (! $time) {
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
144 # No Date header or we failed to parse it
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
145 $time = time;
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
146 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
147
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
148 $from_line = "From " . $from_addr . " " . localtime($time) . "\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
149 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
150
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
151 print($from_line, ($opt_full_headers ? $full_header : $header),
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
152 ($labels ? "X-Babyl-Labels: $labels\n" : ""), "\n",
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
153 $_) || die "$whoami: error writing to stdout: $!\n";
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
154 }
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
155
8440ad4c6055 New file.
Pavel Janík <Pavel@Janik.cz>
parents:
diff changeset
156 close(STDOUT) || die "$whoami: Error closing stdout: $!\n";
52401
695cf19ef79e Add arch taglines
Miles Bader <miles@gnu.org>
parents: 49600
diff changeset
157
695cf19ef79e Add arch taglines
Miles Bader <miles@gnu.org>
parents: 49600
diff changeset
158 # arch-tag: 8c7c8ab0-721c-46d7-ba3e-139801240aa8