Mercurial > hgbook

diff tools/po4a/lib/Locale/Po4a/Po.pm @ 722:082bb76417f1
Add Po4a 0.37-dev(2009-03-08)
author: Dongsheng Song <dongsheng.song@gmail.com>
date: Thu, 12 Mar 2009 15:43:56 +0800
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/po4a/lib/Locale/Po4a/Po.pm	Thu Mar 12 15:43:56 2009 +0800
@@ -0,0 +1,1580 @@
+# Locale::Po4a::Po -- manipulation of po files
+# $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $
+#
+# This program is free software; you may redistribute it and/or modify it
+# under the terms of GPL (see COPYING).
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+=head1 NAME
+
+Locale::Po4a::Po - po file manipulation module
+
+=head1 SYNOPSIS
+
+    use Locale::Po4a::Po;
+    my $pofile=Locale::Po4a::Po->new();
+
+    # Read po file
+    $pofile->read('file.po');
+
+    # Add an entry
+    $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
+                  'flags' => "wrap", 'reference'=>'file.c:46');
+
+    # Extract a translation
+    $pofile->gettext("Hello"); # returns 'bonjour'
+
+    # Write back to a file
+    $pofile->write('otherfile.po');
+
+=head1 DESCRIPTION
+
+Locale::Po4a::Po is a module that allows you to manipulate message
+catalogs. You can load and write from/to a file (which extension is often
+I<po>), you can build new entries on the fly or request for the translation
+of a string.
+
+For a more complete description of message catalogs in the po format and
+their use, please refer to the documentation of the gettext program.
+
+This module is part of the PO4A project, which objective is to use po files
+(designed at origin to ease the translation of program messages) to
+translate everything, including documentation (man page, info manual),
+package description, debconf templates, and everything which may benefit
+from this.
+
+=head1 OPTIONS ACCEPTED BY THIS MODULE
+
+=over 4
+
+=item porefs
+
+This specifies the reference format. It can be one of 'none' to not produce
+any reference, 'noline' to not specify the line number, and 'full' to
+include complete references.
+
+=back
+
+=cut
+
+use IO::File;
+
+
+require Exporter;
+
+package Locale::Po4a::Po;
+use DynaLoader;
+
+use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
+
+use subs qw(makespace);
+use vars qw(@ISA @EXPORT_OK);
+@ISA = qw(Exporter DynaLoader);
+@EXPORT = qw(%debug);
+@EXPORT_OK = qw(&move_po_if_needed);
+
+use Locale::Po4a::TransTractor;
+# Try to use a C extension if present.
+eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
+
+use 5.006;
+use strict;
+use warnings;
+
+use Carp qw(croak);
+use File::Path; # mkdir before write
+use File::Copy; # move
+use POSIX qw(strftime floor);
+use Time::Local;
+
+use Encode;
+
+my @known_flags=qw(wrap no-wrap c-format fuzzy);
+
+our %debug=('canonize'  => 0,
+            'quote'     => 0,
+            'escape'    => 0,
+            'encoding'  => 0,
+            'filter'    => 0);
+
+=head1 Functions about whole message catalogs
+
+=over 4
+
+=item new()
+
+Creates a new message catalog. If an argument is provided, it's the name of
+a po file we should load.
+
+=cut
+
+sub new {
+    my ($this, $options) = (shift, shift);
+    my $class = ref($this) || $this;
+    my $self = {};
+    bless $self, $class;
+    $self->initialize($options);
+
+    my $filename = shift;
+    $self->read($filename) if defined($filename) && length($filename);
+    return $self;
+}
+
+# Return the numerical timezone (e.g. +0200)
+# Neither the %z nor the %s formats of strftime are portable:
+# '%s' is not supported on Solaris and '%z' indicates
+# "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
+sub timezone {
+    my @g = gmtime();
+    my @l = localtime();
+
+    my $diff;
+    $diff  = floor(timelocal(@l)/60 +0.5);
+    $diff -= floor(timelocal(@g)/60 +0.5);
+
+    my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently
+                                       # in a daylight saving time zone
+    my $m = $diff%60;
+
+    return sprintf "%+03d%02d\n", $h, $m;
+}
+
+sub initialize {
+    my ($self, $options) = (shift, shift);
+    my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();
+    chomp $date;
+#    $options = ref($options) || $options;
+
+    $self->{options}{'porefs'}= 'full';
+    $self->{options}{'msgid-bugs-address'}= undef;
+    $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";
+    $self->{options}{'package-name'}= "PACKAGE";
+    $self->{options}{'package-version'}= "VERSION";
+    foreach my $opt (keys %$options) {
+        if ($options->{$opt}) {
+            die wrap_mod("po4a::po",
+                         dgettext ("po4a", "Unknown option: %s"), $opt)
+                unless exists $self->{options}{$opt};
+            $self->{options}{$opt} = $options->{$opt};
+        }
+    }
+    $self->{options}{'porefs'} =~ /^(full|noline|none)$/ ||
+        die wrap_mod("po4a::po",
+                     dgettext ("po4a",
+                               "Invalid value for option 'porefs' ('%s' is ".
+                               "not one of 'full', 'noline' or 'none')"),
+                     $self->{options}{'porefs'});
+
+    $self->{po}=();
+    $self->{count}=0;  # number of msgids in the PO
+    # count_doc: number of strings in the document
+    # (duplicate strings counted multiple times)
+    $self->{count_doc}=0;
+    $self->{header_comment}=
+                     " SOME DESCRIPTIVE TITLE\n"
+                    ." Copyright (C) YEAR ".
+                     $self->{options}{'copyright-holder'}."\n"
+                    ." This file is distributed under the same license ".
+                     "as the ".$self->{options}{'package-name'}." package.\n"
+                    ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"
+                    ."\n"
+                    .", fuzzy";
+#    $self->header_tag="fuzzy";
+    $self->{header}=escape_text("Project-Id-Version: ".
+                                $self->{options}{'package-name'}." ".
+                                $self->{options}{'package-version'}."\n".
+                        ((defined $self->{options}{'msgid-bugs-address'})?
+        "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":
+                                "").
+                                "POT-Creation-Date: $date\n".
+                                "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".
+                                "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".
+                                "Language-Team: LANGUAGE <LL\@li.org>\n".
+                                "MIME-Version: 1.0\n".
+                                "Content-Type: text/plain; charset=CHARSET\n".
+                                "Content-Transfer-Encoding: ENCODING");
+
+    $self->{encoder}=find_encoding("ascii");
+
+    # To make stats about gettext hits
+    $self->stats_clear();
+}
+
+=item read($)
+
+Reads a po file (which name is given as argument).  Previously existing
+entries in self are not removed, the new ones are added to the end of the
+catalog.
+
+=cut
+
+sub read {
+    my $self=shift;
+    my $filename=shift
+        or croak wrap_mod("po4a::po",
+                          dgettext("po4a",
+                                   "Please provide a non-null filename"));
+
+    my $fh;
+    if ($filename eq '-') {
+        $fh=*STDIN;
+    } else {
+        open $fh,"<$filename"
+            or croak wrap_mod("po4a::po",
+                              dgettext("po4a", "Can't read from %s: %s"),
+                              $filename, $!);
+    }
+
+    ## Read paragraphs line-by-line
+    my $pofile="";
+    my $textline;
+    while (defined ($textline = <$fh>)) {
+        $pofile .= $textline;
+    }
+#    close INPUT
+#        or croak (sprintf(dgettext("po4a",
+#                                   "Can't close %s after reading: %s"),
+#                          $filename,$!)."\n");
+
+    my $linenum=0;
+
+    foreach my $msg (split (/\n\n/,$pofile)) {
+        my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer);
+        my ($msgid_plural, $msgstr_plural);
+        foreach my $line (split (/\n/,$msg)) {
+            $linenum++;
+            if ($line =~ /^#\. ?(.*)$/) {  # Automatic comment
+                $automatic .= (defined($automatic) ? "\n" : "").$1;
+
+            } elsif ($line =~ /^#: ?(.*)$/) { # reference
+                $reference .= (defined($reference) ? "\n" : "").$1;
+
+            } elsif ($line =~ /^#, ?(.*)$/) { # flags
+                $flags .= (defined($flags) ? "\n" : "").$1;
+
+            } elsif ($line =~ /^#(.*)$/) {  # Translator comments
+                $comment .= (defined($comment) ? "\n" : "").($1||"");
+
+            } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid
+                $buffer = $1;
+
+            } elsif ($line =~ /^msgid_plural (".*")$/) {
+                # begin of msgid_plural, end of msgid
+
+                $msgid = $buffer;
+                $buffer = $1;
+
+            } elsif ($line =~ /^msgstr (".*")$/) {
+                # begin of msgstr, end of msgid
+
+                $msgid = $buffer;
+                $buffer = "$1";
+
+            } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {
+                # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
+
+                # Note: po4a cannot uses plural forms
+                # (no integer to use the plural form)
+                #   * drop the msgstr[x] where x >= 2
+                #   * use msgstr[0] as the translation of msgid
+                #   * use msgstr[1] as the translation of msgid_plural
+
+                if ($1 eq "0") {
+                    $msgid_plural = $buffer;
+                    $buffer = "$2";
+                } elsif ($1 eq "1") {
+                    $msgstr = $buffer;
+                    $buffer = "$2";
+                } elsif ($1 eq "2") {
+                    $msgstr_plural = $buffer;
+                    warn wrap_ref_mod("$filename:$linenum",
+                                      "po4a::po",
+                                      dgettext("po4a", "Messages with more than 2 plural forms are not supported."));
+                }
+            } elsif ($line =~ /^(".*")$/) {
+                # continuation of a line
+                $buffer .= "\n$1";
+
+            } else {
+                warn wrap_ref_mod("$filename:$linenum",
+                                  "po4a::po",
+                                  dgettext("po4a", "Strange line: -->%s<--"),
+                                  $line);
+            }
+        }
+        $linenum++;
+        if (defined $msgid_plural) {
+            $msgstr_plural=$buffer;
+
+            $msgid = unquote_text($msgid) if (defined($msgid));
+            $msgstr = unquote_text($msgstr) if (defined($msgstr));
+
+            $self->push_raw ('msgid'     => $msgid,
+                             'msgstr'    => $msgstr,
+                             'reference' => $reference,
+                             'flags'     => $flags,
+                             'comment'   => $comment,
+                             'automatic' => $automatic,
+                             'plural'    => 0);
+
+            $msgid_plural = unquote_text($msgid_plural)
+                if (defined($msgid_plural));
+            $msgstr_plural = unquote_text($msgstr_plural)
+                if (defined($msgstr_plural));
+
+            $self->push_raw ('msgid'     => $msgid_plural,
+                             'msgstr'    => $msgstr_plural,
+                             'reference' => $reference,
+                             'flags'     => $flags,
+                             'comment'   => $comment,
+                             'automatic' => $automatic,
+                             'plural'    => 1);
+        } else {
+            $msgstr=$buffer;
+
+            $msgid = unquote_text($msgid) if (defined($msgid));
+            $msgstr = unquote_text($msgstr) if (defined($msgstr));
+
+            $self->push_raw ('msgid'     => $msgid,
+                             'msgstr'    => $msgstr,
+                             'reference' => $reference,
+                             'flags'     => $flags,
+                             'comment'   => $comment,
+                             'automatic' => $automatic);
+        }
+    }
+}
+
+=item write($)
+
+Writes the current catalog to the given file.
+
+=cut
+
+sub write{
+    my $self=shift;
+    my $filename=shift
+        or croak dgettext("po4a","Can't write to a file without filename")."\n";
+
+    my $fh;
+    if ($filename eq '-') {
+        $fh=\*STDOUT;
+    } else {
+        # make sure the directory in which we should write the localized
+        # file exists
+        my $dir = $filename;
+        if ($dir =~ m|/|) {
+            $dir =~ s|/[^/]*$||;
+
+            File::Path::mkpath($dir, 0, 0755) # Croaks on error
+                if (length ($dir) && ! -e $dir);
+        }
+        open $fh,">$filename"
+            or croak wrap_mod("po4a::po",
+                              dgettext("po4a", "Can't write to %s: %s"),
+                              $filename, $!);
+    }
+
+    print $fh "".format_comment($self->{header_comment},"")
+        if defined($self->{header_comment}) && length($self->{header_comment});
+
+    print $fh "msgid \"\"\n";
+    print $fh "msgstr ".quote_text($self->{header})."\n\n";
+
+
+    my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms
+    my $first=1;
+    foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>
+                               ($self->{po}{"$b"}{'pos'})
+                             }  keys %{$self->{po}}) {
+        my $output="";
+
+        if ($first) {
+            $first=0;
+        } else {
+            $output .= "\n";
+        }
+
+        $output .= format_comment($self->{po}{$msgid}{'comment'},"")
+            if    defined($self->{po}{$msgid}{'comment'})
+               && length ($self->{po}{$msgid}{'comment'});
+        if (   defined($self->{po}{$msgid}{'automatic'})
+            && length ($self->{po}{$msgid}{'automatic'})) {
+            foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))
+            {
+                $output .= format_comment($comment, ". ")
+            }
+        }
+        $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")
+            if    defined($self->{po}{$msgid}{'type'})
+               && length ($self->{po}{$msgid}{'type'});
+        $output .= format_comment($self->{po}{$msgid}{'reference'},": ")
+            if    defined($self->{po}{$msgid}{'reference'})
+               && length ($self->{po}{$msgid}{'reference'});
+        $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"
+            if    defined($self->{po}{$msgid}{'flags'})
+               && length ($self->{po}{$msgid}{'flags'});
+
+        if (exists $self->{po}{$msgid}{'plural'}) {
+            if ($self->{po}{$msgid}{'plural'} == 0) {
+                if ($self->get_charset =~ /^utf-8$/i) {
+                    my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
+                    $msgid = Encode::decode_utf8($msgid);
+                    $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
+                    $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");
+                } else {
+                    $output = "msgid ".quote_text($msgid)."\n";
+                    $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
+                }
+            } elsif ($self->{po}{$msgid}{'plural'} == 1) {
+# TODO: there may be only one plural form
+                if ($self->get_charset =~ /^utf-8$/i) {
+                    my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
+                    $msgid = Encode::decode_utf8($msgid);
+                    $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");
+                    $output .= $buf_msgstr_plural;
+                    $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");
+                    $buf_msgstr_plural = "";
+                } else {
+                    $output = "msgid_plural ".quote_text($msgid)."\n";
+                    $output .= $buf_msgstr_plural;
+                    $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
+                }
+            } else {
+                die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));
+            }
+        } else {
+            if ($self->get_charset =~ /^utf-8$/i) {
+                my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
+                $msgid = Encode::decode_utf8($msgid);
+                $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
+                $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");
+            } else {
+                $output .= "msgid ".quote_text($msgid)."\n";
+                $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
+            }
+        }
+
+        print $fh $output;
+    }
+#    print STDERR "$fh";
+#    if ($filename ne '-') {
+#        close $fh
+#            or croak (sprintf(dgettext("po4a",
+#                                       "Can't close %s after writing: %s\n"),
+#                              $filename,$!));
+#    }
+}
+
+=item write_if_needed($$)
+
+Like write, but if the PO or POT file already exists, the object will be
+written in a temporary file which will be compared with the existing file
+to check that the update is needed (this avoids to change a POT just to
+update a line reference or the POT-Creation-Date field).
+
+=cut
+
+sub move_po_if_needed {
+    my ($new_po, $old_po, $backup) = (shift, shift, shift);
+    my $diff;
+
+    if (-e $old_po) {
+        my $diff_ignore = "-I'^#:' "
+                         ."-I'^\"POT-Creation-Date:' "
+                         ."-I'^\"PO-Revision-Date:'";
+        $diff = qx(diff -q $diff_ignore $old_po $new_po);
+        if ( $diff eq "" ) {
+            unlink $new_po
+                or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),
+                                $new_po, $!);
+            # touch the old PO
+            my ($atime, $mtime) = (time,time);
+            utime $atime, $mtime, $old_po;
+        } else {
+            if ($backup) {
+                copy $old_po, $old_po."~"
+                    or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."),
+                                    $old_po, $old_po."~", $!);
+            } else {
+            }
+            move $new_po, $old_po
+                or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
+                                $new_po, $old_po, $!);
+        }
+    } else {
+        move $new_po, $old_po
+            or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
+                            $new_po, $old_po, $!);
+    }
+}
+
+sub write_if_needed {
+    my $self=shift;
+    my $filename=shift
+        or croak dgettext("po4a","Can't write to a file without filename")."\n";
+
+    if (-e $filename) {
+        my ($tmp_filename);
+        (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",
+                                                   DIR    => "/tmp",
+                                                   OPEN   => 0,
+                                                   UNLINK => 0);
+        $self->write($tmp_filename);
+        move_po_if_needed($tmp_filename, $filename);
+    } else {
+        $self->write($filename);
+    }
+}
+
+=item gettextize($$)
+
+This function produces one translated message catalog from two catalogs, an
+original and a translation. This process is described in L<po4a(7)|po4a.7>,
+section I<Gettextization: how does it work?>.
+
+=cut
+
+sub gettextize {
+    my $this = shift;
+    my $class = ref($this) || $this;
+    my ($poorig,$potrans)=(shift,shift);
+
+    my $pores=Locale::Po4a::Po->new();
+
+    my $please_fail = 0;
+    my $toobad = dgettext("po4a",
+        "\nThe gettextization failed (once again). Don't give up, ".
+        "gettextizing is a subtle art, but this is only needed once ".
+        "to convert a project to the gorgeous luxus offered by po4a ".
+        "to translators.".
+        "\nPlease refer to the po4a(7) documentation, the section ".
+        "\"HOWTO convert a pre-existing translation to po4a?\" ".
+        "contains several hints to help you in your task");
+
+    # Don't fail right now when the entry count does not match. Instead, give
+    # it a try so that the user can see where we fail (which is probably where
+    # the problem is).
+    if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {
+        warn wrap_mod("po4a gettextize", dgettext("po4a",
+            "Original has more strings than the translation (%d>%d). ".
+            "Please fix it by editing the translated version to add ".
+            "some dummy entry."),
+                      $poorig->count_entries_doc(),
+                      $potrans->count_entries_doc());
+        $please_fail = 1;
+    } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {
+        warn wrap_mod("po4a gettextize", dgettext("po4a",
+            "Original has less strings than the translation (%d<%d). ".
+            "Please fix it by removing the extra entry from the ".
+            "translated file. You may need an addendum (cf po4a(7)) ".
+            "to reput the chunk in place after gettextization. A ".
+            "possible cause is that a text duplicated in the original ".
+            "is not translated the same way each time. Remove one of ".
+            "the translations, and you're fine."),
+                      $poorig->count_entries_doc(),
+                      $potrans->count_entries_doc());
+        $please_fail = 1;
+    }
+
+    if ( $poorig->get_charset =~ /^utf-8$/i ) {
+        $potrans->to_utf8;
+        $pores->set_charset("utf-8");
+    } else {
+        if ($potrans->get_charset eq "CHARSET") {
+            $pores->set_charset("ascii");
+        } else {
+            $pores->set_charset($potrans->get_charset);
+        }
+    }
+    print "Po character sets:\n".
+        "  original=".$poorig->get_charset."\n".
+        "  translated=".$potrans->get_charset."\n".
+        "  result=".$pores->get_charset."\n"
+            if $debug{'encoding'};
+
+    for (my ($o,$t)=(0,0) ;
+         $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();
+         $o++,$t++) {
+        #
+        # Extract some informations
+
+        my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));
+#       print STDERR "Matches [[$orig]]<<$trans>>\n";
+
+        my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},
+                                 $potrans->{po}{$trans}{'reference'});
+        my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},
+                                   $potrans->{po}{$trans}{'type'});
+
+        #
+        # Make sure the type of both string exist
+        #
+        die wrap_mod("po4a gettextize",
+                     "Internal error: type of original string number %s ".
+                     "isn't provided", $o)
+            if ($typeorig eq '');
+
+        die wrap_mod("po4a gettextize",
+                     "Internal error: type of translated string number %s ".
+                     "isn't provided", $o)
+            if ($typetrans eq '');
+
+        #
+        # Make sure both type are the same
+        #
+        if ($typeorig ne $typetrans){
+            $pores->write("gettextization.failed.po");
+            die wrap_msg(dgettext("po4a",
+                         "po4a gettextization: Structure disparity between ".
+                         "original and translated files:\n".
+                         "msgid (at %s) is of type '%s' while\n".
+                         "msgstr (at %s) is of type '%s'.\n".
+                         "Original text: %s\n".
+                         "Translated text: %s\n".
+                         "(result so far dumped to gettextization.failed.po)").
+                         "%s",
+                         $reforig, $typeorig,
+                         $reftrans, $typetrans,
+                         $orig,
+                         $trans,
+                         $toobad);
+        }
+
+        #
+        # Push the entry
+        #
+        my $flags;
+        if (defined $poorig->{po}{$orig}{'flags'}) {
+            $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";
+        } else {
+            $flags = "fuzzy";
+        }
+        $pores->push_raw('msgid'     => $orig,
+                         'msgstr'    => $trans,
+                         'flags'     => $flags,
+                         'type'      => $typeorig,
+                         'reference' => $reforig,
+                         'conflict'  => 1,
+                         'transref'  => $potrans->{po}{$trans}{'reference'})
+            unless (defined($pores->{po}{$orig})
+                    and ($pores->{po}{$orig}{'msgstr'} eq $trans))
+        # FIXME: maybe we should be smarter about what reference should be
+        #        sent to push_raw.
+    }
+
+    # make sure we return a useful error message when entry count differ
+    die "$toobad\n" if $please_fail;
+
+    return $pores;
+}
+
+=item filter($)
+
+This function extracts a catalog from an existing one. Only the entries having
+a reference in the given file will be placed in the resulting catalog.
+
+This function parses its argument, converts it to a perl function definition,
+eval this definition and filter the fields for which this function returns
+true.
+
+I love perl sometimes ;)
+
+=cut
+
+sub filter {
+    my $self=shift;
+    our $filter=shift;
+
+    my $res;
+    $res = Locale::Po4a::Po->new();
+
+    # Parse the filter
+    our $code="sub apply { return ";
+    our $pos=0;
+    our $length = length $filter;
+
+    # explode chars to parts. How to subscript a string in Perl?
+    our @filter = split(//,$filter);
+
+    sub gloups {
+        my $fmt=shift;
+        my $space = "";
+        for (1..$pos){
+            $space .= ' ';
+        }
+        die wrap_msg("$fmt\n$filter\n$space^ HERE");
+    }
+    sub showmethecode {
+        return unless $debug{'filter'};
+        my $fmt=shift;
+        my $space="";
+        for (1..$pos){
+            $space .= ' ';
+        }
+        print STDERR "$filter\n$space^ $fmt\n";#"$code\n";
+    }
+
+    # I dream of a lex in perl :-/
+    sub parse_expression {
+        showmethecode("Begin expression")
+            if $debug{'filter'};
+
+        gloups("Begin of expression expected, got '%s'",$filter[$pos])
+            unless ($filter[$pos] eq '(');
+        $pos ++; # pass the '('
+        if ($filter[$pos] eq '&') {
+            # AND
+            $pos++;
+            showmethecode("Begin of AND")
+                if $debug{'filter'};
+            $code .= "(";
+            while (1) {
+                gloups ("Unfinished AND statement.")
+                    if ($pos == $length);
+                parse_expression();
+                if ($filter[$pos] eq '(') {
+                    $code .= " && ";
+                } elsif ($filter[$pos] eq ')') {
+                    last; # do not eat that char
+                } else {
+                    gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);
+                }
+            }
+            $code .= ")";
+        } elsif ($filter[$pos] eq '|') {
+            # OR
+            $pos++;
+            $code .= "(";
+            while (1) {
+                gloups("Unfinished OR statement.")
+                    if ($pos == $length);
+                parse_expression();
+                if ($filter[$pos] eq '(') {
+                    $code .= " || ";
+                } elsif ($filter[$pos] eq ')') {
+                    last; # do not eat that char
+                } else {
+                    gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);
+                }
+            }
+            $code .= ")";
+        } elsif ($filter[$pos] eq '!') {
+            # NOT
+            $pos++;
+            $code .= "(!";
+            gloups("Missing sub-expression in NOT statement.")
+                if ($pos == $length);
+            parse_expression();
+            $code .= ")";
+        } else {
+            # must be an equal. Let's get field and argument
+            my ($field,$arg,$done);
+            $field = substr($filter,$pos);
+            gloups("EQ statement contains no '=' or invalid field name")
+                unless ($field =~ /([a-z]*)=/i);
+            $field = lc($1);
+            $pos += (length $field) + 1;
+
+            # check that we've got a valid field name,
+            # and the number it referes to
+            # DO NOT CHANGE THE ORDER
+            my @names=qw(msgid msgstr reference flags comment automatic);
+            my $fieldpos;
+            for ($fieldpos = 0;
+                 $fieldpos < scalar @names && $field ne $names[$fieldpos];
+                 $fieldpos++) {}
+            gloups("Invalid field name: %s",$field)
+                if $fieldpos == scalar @names; # not found
+
+            # Now, get the argument value. It has to be between quotes,
+            # which can be escaped
+            # We point right on the first char of the argument
+            # (first quote already eaten)
+            my $escaped = 0;
+            my $quoted = 0;
+            if ($filter[$pos] eq '"') {
+                $pos++;
+                $quoted = 1;
+            }
+            showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")
+                if $debug{'filter'};
+
+            while (!$done) {
+                gloups("Unfinished EQ argument.")
+                    if ($pos == $length);
+
+                if ($quoted) {
+                    if ($filter[$pos] eq '\\') {
+                        if ($escaped) {
+                            $arg .= '\\';
+                            $escaped = 0;
+                        } else {
+                            $escaped = 1;
+                        }
+                    } elsif ($escaped) {
+                        if ($filter[$pos] eq '"') {
+                            $arg .= '"';
+                            $escaped = 0;
+                        } else {
+                            gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);
+                        }
+                    } else {
+                        if ($filter[$pos] eq '"') {
+                            $done = 1;
+                        } else {
+                            $arg .= $filter[$pos];
+                        }
+                    }
+                } else {
+                    if ($filter[$pos] eq ')') {
+                        # counter the next ++ since we don't want to eat
+                        # this char
+                        $pos--;
+                        $done = 1;
+                    } else {
+                        $arg .= $filter[$pos];
+                    }
+                }
+                $pos++;
+            }
+            # and now, add the code to check this equality
+            $code .= "(\$_[$fieldpos] =~ m/$arg/)";
+
+        }
+        showmethecode("End of expression")
+            if $debug{'filter'};
+        gloups("Unfinished statement.")
+            if ($pos == $length);
+        gloups("End of expression expected, got '%s'",$filter[$pos])
+            unless ($filter[$pos] eq ')');
+        $pos++;
+    }
+    # And now, launch the beast, finish the function and use eval
+    # to construct this function.
+    # Ok, the lack of lexer is a fair price for the eval ;)
+    parse_expression();
+    gloups("Garbage at the end of the expression")
+        if ($pos != $length);
+    $code .= "; }";
+    print STDERR "CODE = $code\n"
+        if $debug{'filter'};
+    eval $code;
+    die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)
+        if $@;
+
+    for (my $cpt=(0) ;
+         $cpt<$self->count_entries();
+         $cpt++) {
+
+        my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic);
+
+        $msgid = $self->msgid($cpt);
+        $ref=$self->{po}{$msgid}{'reference'};
+
+        $msgstr= $self->{po}{$msgid}{'msgstr'};
+        $flags =  $self->{po}{$msgid}{'flags'};
+        $type = $self->{po}{$msgid}{'type'};
+        $comment = $self->{po}{$msgid}{'comment'};
+        $automatic = $self->{po}{$msgid}{'automatic'};
+
+        # DO NOT CHANGE THE ORDER
+        $res->push_raw('msgid' => $msgid,
+                       'msgstr' => $msgstr,
+                       'flags' => $flags,
+                       'type'  => $type,
+                       'reference' => $ref,
+                       'comment' => $comment,
+                       'automatic' => $automatic)
+               if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic));
+    }
+    # delete the apply subroutine
+    # otherwise it will be redefined.
+    undef &apply;
+    return $res;
+}
+
+=item to_utf8()
+
+Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not
+specified in the po file ("CHARSET" value), or if it's already utf-8 or
+ascii.
+
+=cut
+
+sub to_utf8 {
+    my $this = shift;
+    my $charset = $this->get_charset();
+
+    unless ($charset eq "CHARSET" or
+            $charset =~ /^ascii$/i or
+            $charset =~ /^utf-8$/i) {
+        foreach my $msgid ( keys %{$this->{po}} ) {
+            Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");
+        }
+        $this->set_charset("utf-8");
+    }
+}
+
+=back
+
+=head1 Functions to use a message catalog for translations
+
+=over 4
+
+=item gettext($%)
+
+Request the translation of the string given as argument in the current catalog.
+The function returns the original (untranslated) string if the string was not
+found.
+
+After the string to translate, you can pass a hash of extra
+arguments. Here are the valid entries:
+
+=over
+
+=item wrap
+
+boolean indicating whether we can consider that whitespaces in string are
+not important. If yes, the function canonizes the string before looking for
+a translation, and wraps the result.
+
+=item wrapcol
+
+The column at which we should wrap (default: 76).
+
+=back
+
+=cut
+
+sub gettext {
+    my $self=shift;
+    my $text=shift;
+    my (%opt)=@_;
+    my $res;
+
+    return "" unless defined($text) && length($text); # Avoid returning the header.
+    my $validoption="reference wrap wrapcol";
+    my %validoption;
+
+    map { $validoption{$_}=1 } (split(/ /,$validoption));
+    foreach (keys %opt) {
+        Carp::confess "internal error:  unknown arg $_.\n".
+                      "Here are the valid options: $validoption.\n"
+            unless $validoption{$_};
+    }
+
+    $text=canonize($text)
+        if ($opt{'wrap'});
+
+    my $esc_text=escape_text($text);
+
+    $self->{gettextqueries}++;
+
+    if (    defined $self->{po}{$esc_text}
+        and defined $self->{po}{$esc_text}{'msgstr'}
+        and length $self->{po}{$esc_text}{'msgstr'}
+        and (   not defined $self->{po}{$esc_text}{'flags'}
+             or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {
+
+        $self->{gettexthits}++;
+        $res = unescape_text($self->{po}{$esc_text}{'msgstr'});
+        if (defined $self->{po}{$esc_text}{'plural'}) {
+            if ($self->{po}{$esc_text}{'plural'} eq "0") {
+                warn wrap_mod("po4a gettextize", dgettext("po4a",
+                              "'%s' is the singular form of a message, ".
+                              "po4a will use the msgstr[0] translation (%s)."),
+                              $esc_text, $res);
+            } else {
+                warn wrap_mod("po4a gettextize", dgettext("po4a",
+                              "'%s' is the plural form of a message, ".
+                              "po4a will use the msgstr[1] translation (%s)."),
+                              $esc_text, $res);
+            }
+        }
+    } else {
+        $res = $text;
+    }
+
+    if ($opt{'wrap'}) {
+        if ($self->get_charset =~ /^utf-8$/i) {
+            $res=Encode::decode_utf8($res);
+            $res=wrap ($res, $opt{'wrapcol'} || 76);
+            $res=Encode::encode_utf8($res);
+        } else {
+            $res=wrap ($res, $opt{'wrapcol'} || 76);
+        }
+    }
+#    print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
+    return $res;
+}
+
+=item stats_get()
+
+Returns statistics about the hit ratio of gettext since the last time that
+stats_clear() was called. Please note that it's not the same
+statistics than the one printed by msgfmt --statistic. Here, it's statistics
+about recent usage of the po file, while msgfmt reports the status of the
+file.  Example of use:
+
+    [some use of the po file to translate stuff]
+
+    ($percent,$hit,$queries) = $pofile->stats_get();
+    print "So far, we found translations for $percent\%  ($hit of $queries) of strings.\n";
+
+=cut
+
+sub stats_get() {
+    my $self=shift;
+    my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});
+    my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);
+
+#    $p =~ s/\.00//;
+#    $p =~ s/(\..)0/$1/;
+
+    return ( $p,$h,$q );
+}
+
+=item stats_clear()
+
+Clears the statistics about gettext hits.
+
+=cut
+
+sub stats_clear {
+    my $self = shift;
+    $self->{gettextqueries} = 0;
+    $self->{gettexthits} = 0;
+}
+
+=back
+
+=head1 Functions to build a message catalog
+
+=over 4
+
+=item push(%)
+
+Push a new entry at the end of the current catalog. The arguments should
+form a hash table. The valid keys are:
+
+=over 4
+
+=item msgid
+
+the string in original language.
+
+=item msgstr
+
+the translation.
+
+=item reference
+
+an indication of where this string was found. Example: file.c:46 (meaning
+in 'file.c' at line 46). It can be a space-separated list in case of
+multiple occurrences.
+
+=item comment
+
+a comment added here manually (by the translators). The format here is free.
+
+=item automatic
+
+a comment which was automatically added by the string extraction
+program. See the I<--add-comments> option of the B<xgettext> program for
+more information.
+
+=item flags
+
+space-separated list of all defined flags for this entry.
+
+Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text,
+smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text,
+tcl-text, wrap, no-wrap and fuzzy.
+
+See the gettext documentation for their meaning.
+
+=item type
+
+This is mostly an internal argument: it is used while gettextizing
+documents. The idea here is to parse both the original and the translation
+into a po object, and merge them, using one's msgid as msgid and the
+other's msgid as msgstr. To make sure that things get ok, each msgid in po
+objects are given a type, based on their structure (like "chapt", "sect1",
+"p" and so on in docbook). If the types of strings are not the same, that
+means that both files do not share the same structure, and the process
+reports an error.
+
+This information is written as automatic comment in the po file since this
+gives to translators some context about the strings to translate.
+
+=item wrap
+
+boolean indicating whether whitespaces can be mangled in cosmetic
+reformattings. If true, the string is canonized before use.
+
+This information is written to the po file using the 'wrap' or 'no-wrap' flag.
+
+=item wrapcol
+
+The column at which we should wrap (default: 76).
+
+This information is not written to the po file.
+
+=back
+
+=cut
+
+sub push {
+    my $self=shift;
+    my %entry=@_;
+
+    my $validoption="wrap wrapcol type msgid msgstr automatic flags reference";
+    my %validoption;
+
+    map { $validoption{$_}=1 } (split(/ /,$validoption));
+    foreach (keys %entry) {
+        Carp::confess "internal error:  unknown arg $_.\n".
+                      "Here are the valid options: $validoption.\n"
+            unless $validoption{$_};
+    }
+
+    unless ($entry{'wrap'}) {
+        $entry{'flags'} .= " no-wrap";
+    }
+    if (defined ($entry{'msgid'})) {
+        $entry{'msgid'} = canonize($entry{'msgid'})
+            if ($entry{'wrap'});
+
+        $entry{'msgid'} = escape_text($entry{'msgid'});
+    }
+    if (defined ($entry{'msgstr'})) {
+        $entry{'msgstr'} = canonize($entry{'msgstr'})
+            if ($entry{'wrap'});
+
+        $entry{'msgstr'} = escape_text($entry{'msgstr'});
+    }
+
+    $self->push_raw(%entry);
+}
+
+# The same as push(), but assuming that msgid and msgstr are already escaped
+sub push_raw {
+    my $self=shift;
+    my %entry=@_;
+    my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)=
+        ($entry{'msgid'},$entry{'msgstr'},
+         $entry{'reference'},$entry{'comment'},$entry{'automatic'},
+         $entry{'flags'},$entry{'type'},$entry{'transref'});
+    my $keep_conflict = $entry{'conflict'};
+
+#    print STDERR "Push_raw\n";
+#    print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
+#    print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
+#    Carp::cluck " flags=$flags\n" if $flags;
+
+    return unless defined($entry{'msgid'});
+
+    #no msgid => header definition
+    unless (length($entry{'msgid'})) {
+#       if (defined($self->{header}) && $self->{header} =~ /\S/) {
+#           warn dgettext("po4a","Redefinition of the header. ".
+#                                "The old one will be discarded\n");
+#       } FIXME: do that iff the header isn't the default one.
+        $self->{header}=$msgstr;
+        $self->{header_comment}=$comment;
+        my $charset = $self->get_charset;
+        if ($charset ne "CHARSET") {
+            $self->{encoder}=find_encoding($charset);
+        } else {
+            $self->{encoder}=find_encoding("ascii");
+        }
+        return;
+    }
+
+    if ($self->{options}{'porefs'} eq "none") {
+        $reference = "";
+    } elsif ($self->{options}{'porefs'} eq "noline") {
+        $reference =~ s/:[0-9]*/:1/g;
+    }
+
+    if (defined($self->{po}{$msgid})) {
+        warn wrap_mod("po4a::po",
+                      dgettext("po4a","msgid defined twice: %s"),
+                      $msgid)
+            if (0); # FIXME: put a verbose stuff
+        if (    defined $msgstr
+            and defined $self->{po}{$msgid}{'msgstr'}
+            and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {
+            my $txt=quote_text($msgid);
+            my ($first,$second)=
+                (format_comment(". ",$self->{po}{$msgid}{'reference'}).
+                 quote_text($self->{po}{$msgid}{'msgstr'}),
+
+                 format_comment(". ",$reference).
+                 quote_text($msgstr));
+
+            if ($keep_conflict) {
+                if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-#  .*  #-#-#-#-#\\n/s) {
+                    $msgstr = $self->{po}{$msgid}{'msgstr'}.
+                              "\\n#-#-#-#-#  $transref  #-#-#-#-#\\n".
+                              $msgstr;
+                } else {
+                    $msgstr = "#-#-#-#-#  ".
+                              $self->{po}{$msgid}{'transref'}.
+                              "  #-#-#-#-#\\n".
+                              $self->{po}{$msgid}{'msgstr'}."\\n".
+                              "#-#-#-#-#  $transref  #-#-#-#-#\\n".
+                              $msgstr;
+                }
+                # Every msgid will have the same list of references.
+                # Only keep the last list.
+                $self->{po}{$msgid}{'reference'} = "";
+            } else {
+            warn wrap_msg(dgettext("po4a",
+                                   "Translations don't match for:\n".
+                                   "%s\n".
+                                   "-->First translation:\n".
+                                   "%s\n".
+                                   " Second translation:\n".
+                                   "%s\n".
+                                   " Old translation discarded."),
+                          $txt,$first,$second);
+            }
+        }
+    }
+    if (defined $transref) {
+        $self->{po}{$msgid}{'transref'} = $transref;
+    }
+    if (defined $reference) {
+        if (defined $self->{po}{$msgid}{'reference'}) {
+            $self->{po}{$msgid}{'reference'} .= " ".$reference;
+        } else {
+            $self->{po}{$msgid}{'reference'} = $reference;
+        }
+    }
+    $self->{po}{$msgid}{'msgstr'} = $msgstr;
+    $self->{po}{$msgid}{'comment'} = $comment;
+    $self->{po}{$msgid}{'automatic'} = $automatic;
+    if (defined($self->{po}{$msgid}{'pos_doc'})) {
+        $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;
+    } else {
+        $self->{po}{$msgid}{'pos_doc'}  = $self->{count_doc}++;
+    }
+    unless (defined($self->{po}{$msgid}{'pos'})) {
+        $self->{po}{$msgid}{'pos'} = $self->{count}++;
+    }
+    $self->{po}{$msgid}{'type'} = $type;
+    $self->{po}{$msgid}{'plural'} = $entry{'plural'}
+        if defined $entry{'plural'};
+
+    if (defined($flags)) {
+        $flags = " $flags ";
+        $flags =~ s/,/ /g;
+        foreach my $flag (@known_flags) {
+            if ($flags =~ /\s$flag\s/) { # if flag to be set
+                unless (   defined($self->{po}{$msgid}{'flags'})
+                        && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {
+                    # flag not already set
+                    if (defined $self->{po}{$msgid}{'flags'}) {
+                        $self->{po}{$msgid}{'flags'} .= " ".$flag;
+                    } else {
+                        $self->{po}{$msgid}{'flags'} = $flag;
+                    }
+                }
+            }
+        }
+    }
+#    print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
+
+}
+
+=back
+
+=head1 Miscellaneous functions
+
+=over 4
+
+=item count_entries()
+
+Returns the number of entries in the catalog (without the header).
+
+=cut
+
+sub count_entries($) {
+    my $self=shift;
+    return $self->{count};
+}
+
+=item count_entries_doc()
+
+Returns the number of entries in document. If a string appears multiple times
+in the document, it will be counted multiple times
+
+=cut
+
+sub count_entries_doc($) {
+    my $self=shift;
+    return $self->{count_doc};
+}
+
+=item msgid($)
+
+Returns the msgid of the given number.
+
+=cut
+
+sub msgid($$) {
+    my $self=shift;
+    my $num=shift;
+
+    foreach my $msgid ( keys %{$self->{po}} ) {
+        return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);
+    }
+    return undef;
+}
+
+=item msgid_doc($)
+
+Returns the msgid with the given position in the document.
+
+=cut
+
+sub msgid_doc($$) {
+    my $self=shift;
+    my $num=shift;
+
+    foreach my $msgid ( keys %{$self->{po}} ) {
+        foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {
+            return $msgid if ($pos eq $num);
+        }
+    }
+    return undef;
+}
+
+=item get_charset()
+
+Returns the character set specified in the po header. If it hasn't been
+set, it will return "CHARSET".
+
+=cut
+
+sub get_charset() {
+    my $self=shift;
+
+    $self->{header} =~ /charset=(.*?)[\s\\]/;
+
+    if (defined $1) {
+        return $1;
+    } else {
+        return "CHARSET";
+    }
+}
+
+=item set_charset($)
+
+This sets the character set of the po header to the value specified in its
+first argument. If you never call this function (and no file with a specified
+character set is read), the default value is left to "CHARSET". This value
+doesn't change the behavior of this module, it's just used to fill that field
+in the header, and to return it in get_charset().
+
+=cut
+
+sub set_charset() {
+    my $self=shift;
+
+    my ($newchar,$oldchar);
+    $newchar = shift;
+    $oldchar = $self->get_charset();
+
+    $self->{header} =~ s/$oldchar/$newchar/;
+    $self->{encoder}=find_encoding($newchar);
+}
+
+#----[ helper functions ]---------------------------------------------------
+
+# transforme the string from its po file representation to the form which
+#   should be used to print it
+sub unescape_text {
+    my $text = shift;
+
+    print STDERR "\nunescape [$text]====" if $debug{'escape'};
+    $text = join("",split(/\n/,$text));
+    $text =~ s/\\"/"/g;
+    # unescape newlines
+    #   NOTE on \G:
+    #   The following regular expression introduce newlines.
+    #   Thus, ^ doesn't match all beginnings of lines.
+    #   \G is a zero-width assertion that matches the position
+    #   of the previous substitution with s///g. As every
+    #   substitution ends by a newline, it always matches a
+    #   position just after a newline.
+    $text =~ s/(           # $1:
+                (\G|[^\\]) #    beginning of the line or any char
+                           #    different from '\'
+                (\\\\)*    #    followed by any even number of '\'
+               )\\n        # and followed by an escaped newline
+              /$1\n/sgx;   # single string, match globally, allow comments
+    # unescape tabulations
+    $text =~ s/(          # $1:
+                (\G|[^\\])#    beginning of the line or any char
+                          #    different from '\'
+                (\\\\)*   #    followed by any even number of '\'
+               )\\t       # and followed by an escaped tabulation
+              /$1\t/mgx;  # multilines string, match globally, allow comments
+    # and unescape the escape character
+    $text =~ s/\\\\/\\/g;
+    print STDERR ">$text<\n" if $debug{'escape'};
+
+    return $text;
+}
+
+# transform the string to its representation as it should be written in po
+# files
+sub escape_text {
+    my $text = shift;
+
+    print STDERR "\nescape [$text]====" if $debug{'escape'};
+    $text =~ s/\\/\\\\/g;
+    $text =~ s/"/\\"/g;
+    $text =~ s/\n/\\n/g;
+    $text =~ s/\t/\\t/g;
+    print STDERR ">$text<\n" if $debug{'escape'};
+
+    return $text;
+}
+
+# put quotes around the string on each lines (without escaping it)
+# It does also normalize the text (ie, make sure its representation is wraped
+#   on the 80th char, but without changing the meaning of the string)
+sub quote_text {
+    my $string = shift;
+
+    return '""' unless defined($string) && length($string);
+
+    print STDERR "\nquote [$string]====" if $debug{'quote'};
+    # break lines on newlines, if any
+    # see unescape_text for an explanation on \G
+    $string =~ s/(           # $1:
+                  (\G|[^\\]) #    beginning of the line or any char
+                             #    different from '\'
+                  (\\\\)*    #    followed by any even number of '\'
+                 \\n)        # and followed by an escaped newline
+                /$1\n/sgx;   # single string, match globally, allow comments
+    $string = wrap($string);
+    my @string = split(/\n/,$string);
+    $string = join ("\"\n\"",@string);
+    $string = "\"$string\"";
+    if (scalar @string > 1 && $string[0] ne '') {
+        $string = "\"\"\n".$string;
+    }
+
+    print STDERR ">$string<\n" if $debug{'quote'};
+    return $string;
+}
+
+# undo the work of the quote_text function
+sub unquote_text {
+    my $string = shift;
+    print STDERR "\nunquote [$string]====" if $debug{'quote'};
+    $string =~ s/^""\\n//s;
+    $string =~ s/^"(.*)"$/$1/s;
+    $string =~ s/"\n"//gm;
+    # Note: an even number of '\' could precede \\n, but I could not build a
+    # document to test this
+    $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
+    $string =~ s|!!DUMMYPOPM!!|\\n|gm;
+    print STDERR ">$string<\n" if $debug{'quote'};
+    return $string;
+}
+
+# canonize the string: write it on only one line, changing consecutive
+# whitespace to only one space.
+# Warning, it changes the string and should only be called if the string is
+# plain text
+sub canonize {
+    my $text=shift;
+    print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
+    $text =~ s/^ *//s;
+    $text =~ s/^[ \t]+/  /gm;
+    # if ($text eq "\n"), it messed up the first string (header)
+    $text =~ s/\n/  /gm if ($text ne "\n");
+    $text =~ s/([.)])  +/$1  /gm;
+    $text =~ s/([^.)])  */$1 /gm;
+    $text =~ s/ *$//s;
+    print STDERR ">$text<\n" if $debug{'canonize'};
+    return $text;
+}
+
+# wraps the string. We don't use Text::Wrap since it mangles whitespace at
+# the end of splited line
+sub wrap {
+    my $text=shift;
+    return "0" if ($text eq '0');
+    my $col=shift || 76;
+    my @lines=split(/\n/,"$text");
+    my $res="";
+    my $first=1;
+    while (defined(my $line=shift @lines)) {
+        if ($first && length($line) > $col - 10) {
+            unshift @lines,$line;
+            $first=0;
+            next;
+        }
+        if (length($line) > $col) {
+            my $pos=rindex($line," ",$col);
+            while (substr($line,$pos-1,1) eq '.' && $pos != -1) {
+                $pos=rindex($line," ",$pos-1);
+            }
+            if ($pos == -1) {
+                # There are no spaces in the first $col chars, pick-up the
+                # first space
+                $pos = index($line," ");
+            }
+            if ($pos != -1) {
+                my $end=substr($line,$pos+1);
+                $line=substr($line,0,$pos+1);
+                if ($end =~ s/^( +)//) {
+                    $line .= $1;
+                }
+                unshift @lines,$end;
+            }
+        }
+        $first=0;
+        $res.="$line\n";
+    }
+    # Restore the original trailing spaces
+    $res =~ s/\s+$//s;
+    if ($text =~ m/(\s+)$/s) {
+        $res .= $1;
+    }
+    return $res;
+}
+
+# outputs properly a '# ... ' line to be put in the po file
+sub format_comment {
+    my $comment=shift;
+    my $char=shift;
+    my $result = "#". $char . $comment;
+    $result =~ s/\n/\n#$char/gs;
+    $result =~ s/^#$char$/#/gm;
+    $result .= "\n";
+    return $result;
+}
+
+
+1;
+__END__
+
+=back
+
+=head1 AUTHORS
+
+ Denis Barbier <barbier@linuxfr.org>
+ Martin Quinson (mquinson#debian.org)
+
+=cut
author	Dongsheng Song <dongsheng.song@gmail.com>
date	Thu, 12 Mar 2009 15:43:56 +0800
parents
children