comparison tools/po4a/lib/Locale/Po4a/Po.pm @ 722:082bb76417f1

Add Po4a 0.37-dev(2009-03-08)
author Dongsheng Song <dongsheng.song@gmail.com>
date Thu, 12 Mar 2009 15:43:56 +0800
parents
children
comparison
equal deleted inserted replaced
721:2180358c32c4 722:082bb76417f1
1 # Locale::Po4a::Po -- manipulation of po files
2 # $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $
3 #
4 # This program is free software; you may redistribute it and/or modify it
5 # under the terms of GPL (see COPYING).
6
7 ############################################################################
8 # Modules and declarations
9 ############################################################################
10
11 =head1 NAME
12
13 Locale::Po4a::Po - po file manipulation module
14
15 =head1 SYNOPSIS
16
17 use Locale::Po4a::Po;
18 my $pofile=Locale::Po4a::Po->new();
19
20 # Read po file
21 $pofile->read('file.po');
22
23 # Add an entry
24 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour',
25 'flags' => "wrap", 'reference'=>'file.c:46');
26
27 # Extract a translation
28 $pofile->gettext("Hello"); # returns 'bonjour'
29
30 # Write back to a file
31 $pofile->write('otherfile.po');
32
33 =head1 DESCRIPTION
34
35 Locale::Po4a::Po is a module that allows you to manipulate message
36 catalogs. You can load and write from/to a file (which extension is often
37 I<po>), you can build new entries on the fly or request for the translation
38 of a string.
39
40 For a more complete description of message catalogs in the po format and
41 their use, please refer to the documentation of the gettext program.
42
43 This module is part of the PO4A project, which objective is to use po files
44 (designed at origin to ease the translation of program messages) to
45 translate everything, including documentation (man page, info manual),
46 package description, debconf templates, and everything which may benefit
47 from this.
48
49 =head1 OPTIONS ACCEPTED BY THIS MODULE
50
51 =over 4
52
53 =item porefs
54
55 This specifies the reference format. It can be one of 'none' to not produce
56 any reference, 'noline' to not specify the line number, and 'full' to
57 include complete references.
58
59 =back
60
61 =cut
62
63 use IO::File;
64
65
66 require Exporter;
67
68 package Locale::Po4a::Po;
69 use DynaLoader;
70
71 use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext);
72
73 use subs qw(makespace);
74 use vars qw(@ISA @EXPORT_OK);
75 @ISA = qw(Exporter DynaLoader);
76 @EXPORT = qw(%debug);
77 @EXPORT_OK = qw(&move_po_if_needed);
78
79 use Locale::Po4a::TransTractor;
80 # Try to use a C extension if present.
81 eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION");
82
83 use 5.006;
84 use strict;
85 use warnings;
86
87 use Carp qw(croak);
88 use File::Path; # mkdir before write
89 use File::Copy; # move
90 use POSIX qw(strftime floor);
91 use Time::Local;
92
93 use Encode;
94
95 my @known_flags=qw(wrap no-wrap c-format fuzzy);
96
97 our %debug=('canonize' => 0,
98 'quote' => 0,
99 'escape' => 0,
100 'encoding' => 0,
101 'filter' => 0);
102
103 =head1 Functions about whole message catalogs
104
105 =over 4
106
107 =item new()
108
109 Creates a new message catalog. If an argument is provided, it's the name of
110 a po file we should load.
111
112 =cut
113
114 sub new {
115 my ($this, $options) = (shift, shift);
116 my $class = ref($this) || $this;
117 my $self = {};
118 bless $self, $class;
119 $self->initialize($options);
120
121 my $filename = shift;
122 $self->read($filename) if defined($filename) && length($filename);
123 return $self;
124 }
125
126 # Return the numerical timezone (e.g. +0200)
127 # Neither the %z nor the %s formats of strftime are portable:
128 # '%s' is not supported on Solaris and '%z' indicates
129 # "2006-10-25 19:36E. Europe Standard Time" on MS Windows.
130 sub timezone {
131 my @g = gmtime();
132 my @l = localtime();
133
134 my $diff;
135 $diff = floor(timelocal(@l)/60 +0.5);
136 $diff -= floor(timelocal(@g)/60 +0.5);
137
138 my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently
139 # in a daylight saving time zone
140 my $m = $diff%60;
141
142 return sprintf "%+03d%02d\n", $h, $m;
143 }
144
145 sub initialize {
146 my ($self, $options) = (shift, shift);
147 my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone();
148 chomp $date;
149 # $options = ref($options) || $options;
150
151 $self->{options}{'porefs'}= 'full';
152 $self->{options}{'msgid-bugs-address'}= undef;
153 $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc.";
154 $self->{options}{'package-name'}= "PACKAGE";
155 $self->{options}{'package-version'}= "VERSION";
156 foreach my $opt (keys %$options) {
157 if ($options->{$opt}) {
158 die wrap_mod("po4a::po",
159 dgettext ("po4a", "Unknown option: %s"), $opt)
160 unless exists $self->{options}{$opt};
161 $self->{options}{$opt} = $options->{$opt};
162 }
163 }
164 $self->{options}{'porefs'} =~ /^(full|noline|none)$/ ||
165 die wrap_mod("po4a::po",
166 dgettext ("po4a",
167 "Invalid value for option 'porefs' ('%s' is ".
168 "not one of 'full', 'noline' or 'none')"),
169 $self->{options}{'porefs'});
170
171 $self->{po}=();
172 $self->{count}=0; # number of msgids in the PO
173 # count_doc: number of strings in the document
174 # (duplicate strings counted multiple times)
175 $self->{count_doc}=0;
176 $self->{header_comment}=
177 " SOME DESCRIPTIVE TITLE\n"
178 ." Copyright (C) YEAR ".
179 $self->{options}{'copyright-holder'}."\n"
180 ." This file is distributed under the same license ".
181 "as the ".$self->{options}{'package-name'}." package.\n"
182 ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n"
183 ."\n"
184 .", fuzzy";
185 # $self->header_tag="fuzzy";
186 $self->{header}=escape_text("Project-Id-Version: ".
187 $self->{options}{'package-name'}." ".
188 $self->{options}{'package-version'}."\n".
189 ((defined $self->{options}{'msgid-bugs-address'})?
190 "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n":
191 "").
192 "POT-Creation-Date: $date\n".
193 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n".
194 "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n".
195 "Language-Team: LANGUAGE <LL\@li.org>\n".
196 "MIME-Version: 1.0\n".
197 "Content-Type: text/plain; charset=CHARSET\n".
198 "Content-Transfer-Encoding: ENCODING");
199
200 $self->{encoder}=find_encoding("ascii");
201
202 # To make stats about gettext hits
203 $self->stats_clear();
204 }
205
206 =item read($)
207
208 Reads a po file (which name is given as argument). Previously existing
209 entries in self are not removed, the new ones are added to the end of the
210 catalog.
211
212 =cut
213
214 sub read {
215 my $self=shift;
216 my $filename=shift
217 or croak wrap_mod("po4a::po",
218 dgettext("po4a",
219 "Please provide a non-null filename"));
220
221 my $fh;
222 if ($filename eq '-') {
223 $fh=*STDIN;
224 } else {
225 open $fh,"<$filename"
226 or croak wrap_mod("po4a::po",
227 dgettext("po4a", "Can't read from %s: %s"),
228 $filename, $!);
229 }
230
231 ## Read paragraphs line-by-line
232 my $pofile="";
233 my $textline;
234 while (defined ($textline = <$fh>)) {
235 $pofile .= $textline;
236 }
237 # close INPUT
238 # or croak (sprintf(dgettext("po4a",
239 # "Can't close %s after reading: %s"),
240 # $filename,$!)."\n");
241
242 my $linenum=0;
243
244 foreach my $msg (split (/\n\n/,$pofile)) {
245 my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer);
246 my ($msgid_plural, $msgstr_plural);
247 foreach my $line (split (/\n/,$msg)) {
248 $linenum++;
249 if ($line =~ /^#\. ?(.*)$/) { # Automatic comment
250 $automatic .= (defined($automatic) ? "\n" : "").$1;
251
252 } elsif ($line =~ /^#: ?(.*)$/) { # reference
253 $reference .= (defined($reference) ? "\n" : "").$1;
254
255 } elsif ($line =~ /^#, ?(.*)$/) { # flags
256 $flags .= (defined($flags) ? "\n" : "").$1;
257
258 } elsif ($line =~ /^#(.*)$/) { # Translator comments
259 $comment .= (defined($comment) ? "\n" : "").($1||"");
260
261 } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid
262 $buffer = $1;
263
264 } elsif ($line =~ /^msgid_plural (".*")$/) {
265 # begin of msgid_plural, end of msgid
266
267 $msgid = $buffer;
268 $buffer = $1;
269
270 } elsif ($line =~ /^msgstr (".*")$/) {
271 # begin of msgstr, end of msgid
272
273 $msgid = $buffer;
274 $buffer = "$1";
275
276 } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) {
277 # begin of msgstr[x], end of msgid_plural or msgstr[x-1]
278
279 # Note: po4a cannot uses plural forms
280 # (no integer to use the plural form)
281 # * drop the msgstr[x] where x >= 2
282 # * use msgstr[0] as the translation of msgid
283 # * use msgstr[1] as the translation of msgid_plural
284
285 if ($1 eq "0") {
286 $msgid_plural = $buffer;
287 $buffer = "$2";
288 } elsif ($1 eq "1") {
289 $msgstr = $buffer;
290 $buffer = "$2";
291 } elsif ($1 eq "2") {
292 $msgstr_plural = $buffer;
293 warn wrap_ref_mod("$filename:$linenum",
294 "po4a::po",
295 dgettext("po4a", "Messages with more than 2 plural forms are not supported."));
296 }
297 } elsif ($line =~ /^(".*")$/) {
298 # continuation of a line
299 $buffer .= "\n$1";
300
301 } else {
302 warn wrap_ref_mod("$filename:$linenum",
303 "po4a::po",
304 dgettext("po4a", "Strange line: -->%s<--"),
305 $line);
306 }
307 }
308 $linenum++;
309 if (defined $msgid_plural) {
310 $msgstr_plural=$buffer;
311
312 $msgid = unquote_text($msgid) if (defined($msgid));
313 $msgstr = unquote_text($msgstr) if (defined($msgstr));
314
315 $self->push_raw ('msgid' => $msgid,
316 'msgstr' => $msgstr,
317 'reference' => $reference,
318 'flags' => $flags,
319 'comment' => $comment,
320 'automatic' => $automatic,
321 'plural' => 0);
322
323 $msgid_plural = unquote_text($msgid_plural)
324 if (defined($msgid_plural));
325 $msgstr_plural = unquote_text($msgstr_plural)
326 if (defined($msgstr_plural));
327
328 $self->push_raw ('msgid' => $msgid_plural,
329 'msgstr' => $msgstr_plural,
330 'reference' => $reference,
331 'flags' => $flags,
332 'comment' => $comment,
333 'automatic' => $automatic,
334 'plural' => 1);
335 } else {
336 $msgstr=$buffer;
337
338 $msgid = unquote_text($msgid) if (defined($msgid));
339 $msgstr = unquote_text($msgstr) if (defined($msgstr));
340
341 $self->push_raw ('msgid' => $msgid,
342 'msgstr' => $msgstr,
343 'reference' => $reference,
344 'flags' => $flags,
345 'comment' => $comment,
346 'automatic' => $automatic);
347 }
348 }
349 }
350
351 =item write($)
352
353 Writes the current catalog to the given file.
354
355 =cut
356
357 sub write{
358 my $self=shift;
359 my $filename=shift
360 or croak dgettext("po4a","Can't write to a file without filename")."\n";
361
362 my $fh;
363 if ($filename eq '-') {
364 $fh=\*STDOUT;
365 } else {
366 # make sure the directory in which we should write the localized
367 # file exists
368 my $dir = $filename;
369 if ($dir =~ m|/|) {
370 $dir =~ s|/[^/]*$||;
371
372 File::Path::mkpath($dir, 0, 0755) # Croaks on error
373 if (length ($dir) && ! -e $dir);
374 }
375 open $fh,">$filename"
376 or croak wrap_mod("po4a::po",
377 dgettext("po4a", "Can't write to %s: %s"),
378 $filename, $!);
379 }
380
381 print $fh "".format_comment($self->{header_comment},"")
382 if defined($self->{header_comment}) && length($self->{header_comment});
383
384 print $fh "msgid \"\"\n";
385 print $fh "msgstr ".quote_text($self->{header})."\n\n";
386
387
388 my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms
389 my $first=1;
390 foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=>
391 ($self->{po}{"$b"}{'pos'})
392 } keys %{$self->{po}}) {
393 my $output="";
394
395 if ($first) {
396 $first=0;
397 } else {
398 $output .= "\n";
399 }
400
401 $output .= format_comment($self->{po}{$msgid}{'comment'},"")
402 if defined($self->{po}{$msgid}{'comment'})
403 && length ($self->{po}{$msgid}{'comment'});
404 if ( defined($self->{po}{$msgid}{'automatic'})
405 && length ($self->{po}{$msgid}{'automatic'})) {
406 foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'}))
407 {
408 $output .= format_comment($comment, ". ")
409 }
410 }
411 $output .= format_comment($self->{po}{$msgid}{'type'},". type: ")
412 if defined($self->{po}{$msgid}{'type'})
413 && length ($self->{po}{$msgid}{'type'});
414 $output .= format_comment($self->{po}{$msgid}{'reference'},": ")
415 if defined($self->{po}{$msgid}{'reference'})
416 && length ($self->{po}{$msgid}{'reference'});
417 $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n"
418 if defined($self->{po}{$msgid}{'flags'})
419 && length ($self->{po}{$msgid}{'flags'});
420
421 if (exists $self->{po}{$msgid}{'plural'}) {
422 if ($self->{po}{$msgid}{'plural'} == 0) {
423 if ($self->get_charset =~ /^utf-8$/i) {
424 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
425 $msgid = Encode::decode_utf8($msgid);
426 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
427 $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n");
428 } else {
429 $output = "msgid ".quote_text($msgid)."\n";
430 $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
431 }
432 } elsif ($self->{po}{$msgid}{'plural'} == 1) {
433 # TODO: there may be only one plural form
434 if ($self->get_charset =~ /^utf-8$/i) {
435 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
436 $msgid = Encode::decode_utf8($msgid);
437 $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n");
438 $output .= $buf_msgstr_plural;
439 $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n");
440 $buf_msgstr_plural = "";
441 } else {
442 $output = "msgid_plural ".quote_text($msgid)."\n";
443 $output .= $buf_msgstr_plural;
444 $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
445 }
446 } else {
447 die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms."));
448 }
449 } else {
450 if ($self->get_charset =~ /^utf-8$/i) {
451 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'});
452 $msgid = Encode::decode_utf8($msgid);
453 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n");
454 $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n");
455 } else {
456 $output .= "msgid ".quote_text($msgid)."\n";
457 $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n";
458 }
459 }
460
461 print $fh $output;
462 }
463 # print STDERR "$fh";
464 # if ($filename ne '-') {
465 # close $fh
466 # or croak (sprintf(dgettext("po4a",
467 # "Can't close %s after writing: %s\n"),
468 # $filename,$!));
469 # }
470 }
471
472 =item write_if_needed($$)
473
474 Like write, but if the PO or POT file already exists, the object will be
475 written in a temporary file which will be compared with the existing file
476 to check that the update is needed (this avoids to change a POT just to
477 update a line reference or the POT-Creation-Date field).
478
479 =cut
480
481 sub move_po_if_needed {
482 my ($new_po, $old_po, $backup) = (shift, shift, shift);
483 my $diff;
484
485 if (-e $old_po) {
486 my $diff_ignore = "-I'^#:' "
487 ."-I'^\"POT-Creation-Date:' "
488 ."-I'^\"PO-Revision-Date:'";
489 $diff = qx(diff -q $diff_ignore $old_po $new_po);
490 if ( $diff eq "" ) {
491 unlink $new_po
492 or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."),
493 $new_po, $!);
494 # touch the old PO
495 my ($atime, $mtime) = (time,time);
496 utime $atime, $mtime, $old_po;
497 } else {
498 if ($backup) {
499 copy $old_po, $old_po."~"
500 or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."),
501 $old_po, $old_po."~", $!);
502 } else {
503 }
504 move $new_po, $old_po
505 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
506 $new_po, $old_po, $!);
507 }
508 } else {
509 move $new_po, $old_po
510 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."),
511 $new_po, $old_po, $!);
512 }
513 }
514
515 sub write_if_needed {
516 my $self=shift;
517 my $filename=shift
518 or croak dgettext("po4a","Can't write to a file without filename")."\n";
519
520 if (-e $filename) {
521 my ($tmp_filename);
522 (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX",
523 DIR => "/tmp",
524 OPEN => 0,
525 UNLINK => 0);
526 $self->write($tmp_filename);
527 move_po_if_needed($tmp_filename, $filename);
528 } else {
529 $self->write($filename);
530 }
531 }
532
533 =item gettextize($$)
534
535 This function produces one translated message catalog from two catalogs, an
536 original and a translation. This process is described in L<po4a(7)|po4a.7>,
537 section I<Gettextization: how does it work?>.
538
539 =cut
540
541 sub gettextize {
542 my $this = shift;
543 my $class = ref($this) || $this;
544 my ($poorig,$potrans)=(shift,shift);
545
546 my $pores=Locale::Po4a::Po->new();
547
548 my $please_fail = 0;
549 my $toobad = dgettext("po4a",
550 "\nThe gettextization failed (once again). Don't give up, ".
551 "gettextizing is a subtle art, but this is only needed once ".
552 "to convert a project to the gorgeous luxus offered by po4a ".
553 "to translators.".
554 "\nPlease refer to the po4a(7) documentation, the section ".
555 "\"HOWTO convert a pre-existing translation to po4a?\" ".
556 "contains several hints to help you in your task");
557
558 # Don't fail right now when the entry count does not match. Instead, give
559 # it a try so that the user can see where we fail (which is probably where
560 # the problem is).
561 if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) {
562 warn wrap_mod("po4a gettextize", dgettext("po4a",
563 "Original has more strings than the translation (%d>%d). ".
564 "Please fix it by editing the translated version to add ".
565 "some dummy entry."),
566 $poorig->count_entries_doc(),
567 $potrans->count_entries_doc());
568 $please_fail = 1;
569 } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) {
570 warn wrap_mod("po4a gettextize", dgettext("po4a",
571 "Original has less strings than the translation (%d<%d). ".
572 "Please fix it by removing the extra entry from the ".
573 "translated file. You may need an addendum (cf po4a(7)) ".
574 "to reput the chunk in place after gettextization. A ".
575 "possible cause is that a text duplicated in the original ".
576 "is not translated the same way each time. Remove one of ".
577 "the translations, and you're fine."),
578 $poorig->count_entries_doc(),
579 $potrans->count_entries_doc());
580 $please_fail = 1;
581 }
582
583 if ( $poorig->get_charset =~ /^utf-8$/i ) {
584 $potrans->to_utf8;
585 $pores->set_charset("utf-8");
586 } else {
587 if ($potrans->get_charset eq "CHARSET") {
588 $pores->set_charset("ascii");
589 } else {
590 $pores->set_charset($potrans->get_charset);
591 }
592 }
593 print "Po character sets:\n".
594 " original=".$poorig->get_charset."\n".
595 " translated=".$potrans->get_charset."\n".
596 " result=".$pores->get_charset."\n"
597 if $debug{'encoding'};
598
599 for (my ($o,$t)=(0,0) ;
600 $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc();
601 $o++,$t++) {
602 #
603 # Extract some informations
604
605 my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t));
606 # print STDERR "Matches [[$orig]]<<$trans>>\n";
607
608 my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'},
609 $potrans->{po}{$trans}{'reference'});
610 my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'},
611 $potrans->{po}{$trans}{'type'});
612
613 #
614 # Make sure the type of both string exist
615 #
616 die wrap_mod("po4a gettextize",
617 "Internal error: type of original string number %s ".
618 "isn't provided", $o)
619 if ($typeorig eq '');
620
621 die wrap_mod("po4a gettextize",
622 "Internal error: type of translated string number %s ".
623 "isn't provided", $o)
624 if ($typetrans eq '');
625
626 #
627 # Make sure both type are the same
628 #
629 if ($typeorig ne $typetrans){
630 $pores->write("gettextization.failed.po");
631 die wrap_msg(dgettext("po4a",
632 "po4a gettextization: Structure disparity between ".
633 "original and translated files:\n".
634 "msgid (at %s) is of type '%s' while\n".
635 "msgstr (at %s) is of type '%s'.\n".
636 "Original text: %s\n".
637 "Translated text: %s\n".
638 "(result so far dumped to gettextization.failed.po)").
639 "%s",
640 $reforig, $typeorig,
641 $reftrans, $typetrans,
642 $orig,
643 $trans,
644 $toobad);
645 }
646
647 #
648 # Push the entry
649 #
650 my $flags;
651 if (defined $poorig->{po}{$orig}{'flags'}) {
652 $flags = $poorig->{po}{$orig}{'flags'}." fuzzy";
653 } else {
654 $flags = "fuzzy";
655 }
656 $pores->push_raw('msgid' => $orig,
657 'msgstr' => $trans,
658 'flags' => $flags,
659 'type' => $typeorig,
660 'reference' => $reforig,
661 'conflict' => 1,
662 'transref' => $potrans->{po}{$trans}{'reference'})
663 unless (defined($pores->{po}{$orig})
664 and ($pores->{po}{$orig}{'msgstr'} eq $trans))
665 # FIXME: maybe we should be smarter about what reference should be
666 # sent to push_raw.
667 }
668
669 # make sure we return a useful error message when entry count differ
670 die "$toobad\n" if $please_fail;
671
672 return $pores;
673 }
674
675 =item filter($)
676
677 This function extracts a catalog from an existing one. Only the entries having
678 a reference in the given file will be placed in the resulting catalog.
679
680 This function parses its argument, converts it to a perl function definition,
681 eval this definition and filter the fields for which this function returns
682 true.
683
684 I love perl sometimes ;)
685
686 =cut
687
688 sub filter {
689 my $self=shift;
690 our $filter=shift;
691
692 my $res;
693 $res = Locale::Po4a::Po->new();
694
695 # Parse the filter
696 our $code="sub apply { return ";
697 our $pos=0;
698 our $length = length $filter;
699
700 # explode chars to parts. How to subscript a string in Perl?
701 our @filter = split(//,$filter);
702
703 sub gloups {
704 my $fmt=shift;
705 my $space = "";
706 for (1..$pos){
707 $space .= ' ';
708 }
709 die wrap_msg("$fmt\n$filter\n$space^ HERE");
710 }
711 sub showmethecode {
712 return unless $debug{'filter'};
713 my $fmt=shift;
714 my $space="";
715 for (1..$pos){
716 $space .= ' ';
717 }
718 print STDERR "$filter\n$space^ $fmt\n";#"$code\n";
719 }
720
721 # I dream of a lex in perl :-/
722 sub parse_expression {
723 showmethecode("Begin expression")
724 if $debug{'filter'};
725
726 gloups("Begin of expression expected, got '%s'",$filter[$pos])
727 unless ($filter[$pos] eq '(');
728 $pos ++; # pass the '('
729 if ($filter[$pos] eq '&') {
730 # AND
731 $pos++;
732 showmethecode("Begin of AND")
733 if $debug{'filter'};
734 $code .= "(";
735 while (1) {
736 gloups ("Unfinished AND statement.")
737 if ($pos == $length);
738 parse_expression();
739 if ($filter[$pos] eq '(') {
740 $code .= " && ";
741 } elsif ($filter[$pos] eq ')') {
742 last; # do not eat that char
743 } else {
744 gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]);
745 }
746 }
747 $code .= ")";
748 } elsif ($filter[$pos] eq '|') {
749 # OR
750 $pos++;
751 $code .= "(";
752 while (1) {
753 gloups("Unfinished OR statement.")
754 if ($pos == $length);
755 parse_expression();
756 if ($filter[$pos] eq '(') {
757 $code .= " || ";
758 } elsif ($filter[$pos] eq ')') {
759 last; # do not eat that char
760 } else {
761 gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]);
762 }
763 }
764 $code .= ")";
765 } elsif ($filter[$pos] eq '!') {
766 # NOT
767 $pos++;
768 $code .= "(!";
769 gloups("Missing sub-expression in NOT statement.")
770 if ($pos == $length);
771 parse_expression();
772 $code .= ")";
773 } else {
774 # must be an equal. Let's get field and argument
775 my ($field,$arg,$done);
776 $field = substr($filter,$pos);
777 gloups("EQ statement contains no '=' or invalid field name")
778 unless ($field =~ /([a-z]*)=/i);
779 $field = lc($1);
780 $pos += (length $field) + 1;
781
782 # check that we've got a valid field name,
783 # and the number it referes to
784 # DO NOT CHANGE THE ORDER
785 my @names=qw(msgid msgstr reference flags comment automatic);
786 my $fieldpos;
787 for ($fieldpos = 0;
788 $fieldpos < scalar @names && $field ne $names[$fieldpos];
789 $fieldpos++) {}
790 gloups("Invalid field name: %s",$field)
791 if $fieldpos == scalar @names; # not found
792
793 # Now, get the argument value. It has to be between quotes,
794 # which can be escaped
795 # We point right on the first char of the argument
796 # (first quote already eaten)
797 my $escaped = 0;
798 my $quoted = 0;
799 if ($filter[$pos] eq '"') {
800 $pos++;
801 $quoted = 1;
802 }
803 showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'")
804 if $debug{'filter'};
805
806 while (!$done) {
807 gloups("Unfinished EQ argument.")
808 if ($pos == $length);
809
810 if ($quoted) {
811 if ($filter[$pos] eq '\\') {
812 if ($escaped) {
813 $arg .= '\\';
814 $escaped = 0;
815 } else {
816 $escaped = 1;
817 }
818 } elsif ($escaped) {
819 if ($filter[$pos] eq '"') {
820 $arg .= '"';
821 $escaped = 0;
822 } else {
823 gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]);
824 }
825 } else {
826 if ($filter[$pos] eq '"') {
827 $done = 1;
828 } else {
829 $arg .= $filter[$pos];
830 }
831 }
832 } else {
833 if ($filter[$pos] eq ')') {
834 # counter the next ++ since we don't want to eat
835 # this char
836 $pos--;
837 $done = 1;
838 } else {
839 $arg .= $filter[$pos];
840 }
841 }
842 $pos++;
843 }
844 # and now, add the code to check this equality
845 $code .= "(\$_[$fieldpos] =~ m/$arg/)";
846
847 }
848 showmethecode("End of expression")
849 if $debug{'filter'};
850 gloups("Unfinished statement.")
851 if ($pos == $length);
852 gloups("End of expression expected, got '%s'",$filter[$pos])
853 unless ($filter[$pos] eq ')');
854 $pos++;
855 }
856 # And now, launch the beast, finish the function and use eval
857 # to construct this function.
858 # Ok, the lack of lexer is a fair price for the eval ;)
859 parse_expression();
860 gloups("Garbage at the end of the expression")
861 if ($pos != $length);
862 $code .= "; }";
863 print STDERR "CODE = $code\n"
864 if $debug{'filter'};
865 eval $code;
866 die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@)
867 if $@;
868
869 for (my $cpt=(0) ;
870 $cpt<$self->count_entries();
871 $cpt++) {
872
873 my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic);
874
875 $msgid = $self->msgid($cpt);
876 $ref=$self->{po}{$msgid}{'reference'};
877
878 $msgstr= $self->{po}{$msgid}{'msgstr'};
879 $flags = $self->{po}{$msgid}{'flags'};
880 $type = $self->{po}{$msgid}{'type'};
881 $comment = $self->{po}{$msgid}{'comment'};
882 $automatic = $self->{po}{$msgid}{'automatic'};
883
884 # DO NOT CHANGE THE ORDER
885 $res->push_raw('msgid' => $msgid,
886 'msgstr' => $msgstr,
887 'flags' => $flags,
888 'type' => $type,
889 'reference' => $ref,
890 'comment' => $comment,
891 'automatic' => $automatic)
892 if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic));
893 }
894 # delete the apply subroutine
895 # otherwise it will be redefined.
896 undef &apply;
897 return $res;
898 }
899
900 =item to_utf8()
901
902 Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not
903 specified in the po file ("CHARSET" value), or if it's already utf-8 or
904 ascii.
905
906 =cut
907
908 sub to_utf8 {
909 my $this = shift;
910 my $charset = $this->get_charset();
911
912 unless ($charset eq "CHARSET" or
913 $charset =~ /^ascii$/i or
914 $charset =~ /^utf-8$/i) {
915 foreach my $msgid ( keys %{$this->{po}} ) {
916 Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8");
917 }
918 $this->set_charset("utf-8");
919 }
920 }
921
922 =back
923
924 =head1 Functions to use a message catalog for translations
925
926 =over 4
927
928 =item gettext($%)
929
930 Request the translation of the string given as argument in the current catalog.
931 The function returns the original (untranslated) string if the string was not
932 found.
933
934 After the string to translate, you can pass a hash of extra
935 arguments. Here are the valid entries:
936
937 =over
938
939 =item wrap
940
941 boolean indicating whether we can consider that whitespaces in string are
942 not important. If yes, the function canonizes the string before looking for
943 a translation, and wraps the result.
944
945 =item wrapcol
946
947 The column at which we should wrap (default: 76).
948
949 =back
950
951 =cut
952
953 sub gettext {
954 my $self=shift;
955 my $text=shift;
956 my (%opt)=@_;
957 my $res;
958
959 return "" unless defined($text) && length($text); # Avoid returning the header.
960 my $validoption="reference wrap wrapcol";
961 my %validoption;
962
963 map { $validoption{$_}=1 } (split(/ /,$validoption));
964 foreach (keys %opt) {
965 Carp::confess "internal error: unknown arg $_.\n".
966 "Here are the valid options: $validoption.\n"
967 unless $validoption{$_};
968 }
969
970 $text=canonize($text)
971 if ($opt{'wrap'});
972
973 my $esc_text=escape_text($text);
974
975 $self->{gettextqueries}++;
976
977 if ( defined $self->{po}{$esc_text}
978 and defined $self->{po}{$esc_text}{'msgstr'}
979 and length $self->{po}{$esc_text}{'msgstr'}
980 and ( not defined $self->{po}{$esc_text}{'flags'}
981 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) {
982
983 $self->{gettexthits}++;
984 $res = unescape_text($self->{po}{$esc_text}{'msgstr'});
985 if (defined $self->{po}{$esc_text}{'plural'}) {
986 if ($self->{po}{$esc_text}{'plural'} eq "0") {
987 warn wrap_mod("po4a gettextize", dgettext("po4a",
988 "'%s' is the singular form of a message, ".
989 "po4a will use the msgstr[0] translation (%s)."),
990 $esc_text, $res);
991 } else {
992 warn wrap_mod("po4a gettextize", dgettext("po4a",
993 "'%s' is the plural form of a message, ".
994 "po4a will use the msgstr[1] translation (%s)."),
995 $esc_text, $res);
996 }
997 }
998 } else {
999 $res = $text;
1000 }
1001
1002 if ($opt{'wrap'}) {
1003 if ($self->get_charset =~ /^utf-8$/i) {
1004 $res=Encode::decode_utf8($res);
1005 $res=wrap ($res, $opt{'wrapcol'} || 76);
1006 $res=Encode::encode_utf8($res);
1007 } else {
1008 $res=wrap ($res, $opt{'wrapcol'} || 76);
1009 }
1010 }
1011 # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n";
1012 return $res;
1013 }
1014
1015 =item stats_get()
1016
1017 Returns statistics about the hit ratio of gettext since the last time that
1018 stats_clear() was called. Please note that it's not the same
1019 statistics than the one printed by msgfmt --statistic. Here, it's statistics
1020 about recent usage of the po file, while msgfmt reports the status of the
1021 file. Example of use:
1022
1023 [some use of the po file to translate stuff]
1024
1025 ($percent,$hit,$queries) = $pofile->stats_get();
1026 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n";
1027
1028 =cut
1029
1030 sub stats_get() {
1031 my $self=shift;
1032 my ($h,$q)=($self->{gettexthits},$self->{gettextqueries});
1033 my $p = ($q == 0 ? 100 : int($h/$q*10000)/100);
1034
1035 # $p =~ s/\.00//;
1036 # $p =~ s/(\..)0/$1/;
1037
1038 return ( $p,$h,$q );
1039 }
1040
1041 =item stats_clear()
1042
1043 Clears the statistics about gettext hits.
1044
1045 =cut
1046
1047 sub stats_clear {
1048 my $self = shift;
1049 $self->{gettextqueries} = 0;
1050 $self->{gettexthits} = 0;
1051 }
1052
1053 =back
1054
1055 =head1 Functions to build a message catalog
1056
1057 =over 4
1058
1059 =item push(%)
1060
1061 Push a new entry at the end of the current catalog. The arguments should
1062 form a hash table. The valid keys are:
1063
1064 =over 4
1065
1066 =item msgid
1067
1068 the string in original language.
1069
1070 =item msgstr
1071
1072 the translation.
1073
1074 =item reference
1075
1076 an indication of where this string was found. Example: file.c:46 (meaning
1077 in 'file.c' at line 46). It can be a space-separated list in case of
1078 multiple occurrences.
1079
1080 =item comment
1081
1082 a comment added here manually (by the translators). The format here is free.
1083
1084 =item automatic
1085
1086 a comment which was automatically added by the string extraction
1087 program. See the I<--add-comments> option of the B<xgettext> program for
1088 more information.
1089
1090 =item flags
1091
1092 space-separated list of all defined flags for this entry.
1093
1094 Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text,
1095 smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text,
1096 tcl-text, wrap, no-wrap and fuzzy.
1097
1098 See the gettext documentation for their meaning.
1099
1100 =item type
1101
1102 This is mostly an internal argument: it is used while gettextizing
1103 documents. The idea here is to parse both the original and the translation
1104 into a po object, and merge them, using one's msgid as msgid and the
1105 other's msgid as msgstr. To make sure that things get ok, each msgid in po
1106 objects are given a type, based on their structure (like "chapt", "sect1",
1107 "p" and so on in docbook). If the types of strings are not the same, that
1108 means that both files do not share the same structure, and the process
1109 reports an error.
1110
1111 This information is written as automatic comment in the po file since this
1112 gives to translators some context about the strings to translate.
1113
1114 =item wrap
1115
1116 boolean indicating whether whitespaces can be mangled in cosmetic
1117 reformattings. If true, the string is canonized before use.
1118
1119 This information is written to the po file using the 'wrap' or 'no-wrap' flag.
1120
1121 =item wrapcol
1122
1123 The column at which we should wrap (default: 76).
1124
1125 This information is not written to the po file.
1126
1127 =back
1128
1129 =cut
1130
1131 sub push {
1132 my $self=shift;
1133 my %entry=@_;
1134
1135 my $validoption="wrap wrapcol type msgid msgstr automatic flags reference";
1136 my %validoption;
1137
1138 map { $validoption{$_}=1 } (split(/ /,$validoption));
1139 foreach (keys %entry) {
1140 Carp::confess "internal error: unknown arg $_.\n".
1141 "Here are the valid options: $validoption.\n"
1142 unless $validoption{$_};
1143 }
1144
1145 unless ($entry{'wrap'}) {
1146 $entry{'flags'} .= " no-wrap";
1147 }
1148 if (defined ($entry{'msgid'})) {
1149 $entry{'msgid'} = canonize($entry{'msgid'})
1150 if ($entry{'wrap'});
1151
1152 $entry{'msgid'} = escape_text($entry{'msgid'});
1153 }
1154 if (defined ($entry{'msgstr'})) {
1155 $entry{'msgstr'} = canonize($entry{'msgstr'})
1156 if ($entry{'wrap'});
1157
1158 $entry{'msgstr'} = escape_text($entry{'msgstr'});
1159 }
1160
1161 $self->push_raw(%entry);
1162 }
1163
1164 # The same as push(), but assuming that msgid and msgstr are already escaped
1165 sub push_raw {
1166 my $self=shift;
1167 my %entry=@_;
1168 my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)=
1169 ($entry{'msgid'},$entry{'msgstr'},
1170 $entry{'reference'},$entry{'comment'},$entry{'automatic'},
1171 $entry{'flags'},$entry{'type'},$entry{'transref'});
1172 my $keep_conflict = $entry{'conflict'};
1173
1174 # print STDERR "Push_raw\n";
1175 # print STDERR " msgid=>>>$msgid<<<\n" if $msgid;
1176 # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr;
1177 # Carp::cluck " flags=$flags\n" if $flags;
1178
1179 return unless defined($entry{'msgid'});
1180
1181 #no msgid => header definition
1182 unless (length($entry{'msgid'})) {
1183 # if (defined($self->{header}) && $self->{header} =~ /\S/) {
1184 # warn dgettext("po4a","Redefinition of the header. ".
1185 # "The old one will be discarded\n");
1186 # } FIXME: do that iff the header isn't the default one.
1187 $self->{header}=$msgstr;
1188 $self->{header_comment}=$comment;
1189 my $charset = $self->get_charset;
1190 if ($charset ne "CHARSET") {
1191 $self->{encoder}=find_encoding($charset);
1192 } else {
1193 $self->{encoder}=find_encoding("ascii");
1194 }
1195 return;
1196 }
1197
1198 if ($self->{options}{'porefs'} eq "none") {
1199 $reference = "";
1200 } elsif ($self->{options}{'porefs'} eq "noline") {
1201 $reference =~ s/:[0-9]*/:1/g;
1202 }
1203
1204 if (defined($self->{po}{$msgid})) {
1205 warn wrap_mod("po4a::po",
1206 dgettext("po4a","msgid defined twice: %s"),
1207 $msgid)
1208 if (0); # FIXME: put a verbose stuff
1209 if ( defined $msgstr
1210 and defined $self->{po}{$msgid}{'msgstr'}
1211 and $self->{po}{$msgid}{'msgstr'} ne $msgstr) {
1212 my $txt=quote_text($msgid);
1213 my ($first,$second)=
1214 (format_comment(". ",$self->{po}{$msgid}{'reference'}).
1215 quote_text($self->{po}{$msgid}{'msgstr'}),
1216
1217 format_comment(". ",$reference).
1218 quote_text($msgstr));
1219
1220 if ($keep_conflict) {
1221 if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) {
1222 $msgstr = $self->{po}{$msgid}{'msgstr'}.
1223 "\\n#-#-#-#-# $transref #-#-#-#-#\\n".
1224 $msgstr;
1225 } else {
1226 $msgstr = "#-#-#-#-# ".
1227 $self->{po}{$msgid}{'transref'}.
1228 " #-#-#-#-#\\n".
1229 $self->{po}{$msgid}{'msgstr'}."\\n".
1230 "#-#-#-#-# $transref #-#-#-#-#\\n".
1231 $msgstr;
1232 }
1233 # Every msgid will have the same list of references.
1234 # Only keep the last list.
1235 $self->{po}{$msgid}{'reference'} = "";
1236 } else {
1237 warn wrap_msg(dgettext("po4a",
1238 "Translations don't match for:\n".
1239 "%s\n".
1240 "-->First translation:\n".
1241 "%s\n".
1242 " Second translation:\n".
1243 "%s\n".
1244 " Old translation discarded."),
1245 $txt,$first,$second);
1246 }
1247 }
1248 }
1249 if (defined $transref) {
1250 $self->{po}{$msgid}{'transref'} = $transref;
1251 }
1252 if (defined $reference) {
1253 if (defined $self->{po}{$msgid}{'reference'}) {
1254 $self->{po}{$msgid}{'reference'} .= " ".$reference;
1255 } else {
1256 $self->{po}{$msgid}{'reference'} = $reference;
1257 }
1258 }
1259 $self->{po}{$msgid}{'msgstr'} = $msgstr;
1260 $self->{po}{$msgid}{'comment'} = $comment;
1261 $self->{po}{$msgid}{'automatic'} = $automatic;
1262 if (defined($self->{po}{$msgid}{'pos_doc'})) {
1263 $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++;
1264 } else {
1265 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++;
1266 }
1267 unless (defined($self->{po}{$msgid}{'pos'})) {
1268 $self->{po}{$msgid}{'pos'} = $self->{count}++;
1269 }
1270 $self->{po}{$msgid}{'type'} = $type;
1271 $self->{po}{$msgid}{'plural'} = $entry{'plural'}
1272 if defined $entry{'plural'};
1273
1274 if (defined($flags)) {
1275 $flags = " $flags ";
1276 $flags =~ s/,/ /g;
1277 foreach my $flag (@known_flags) {
1278 if ($flags =~ /\s$flag\s/) { # if flag to be set
1279 unless ( defined($self->{po}{$msgid}{'flags'})
1280 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) {
1281 # flag not already set
1282 if (defined $self->{po}{$msgid}{'flags'}) {
1283 $self->{po}{$msgid}{'flags'} .= " ".$flag;
1284 } else {
1285 $self->{po}{$msgid}{'flags'} = $flag;
1286 }
1287 }
1288 }
1289 }
1290 }
1291 # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n";
1292
1293 }
1294
1295 =back
1296
1297 =head1 Miscellaneous functions
1298
1299 =over 4
1300
1301 =item count_entries()
1302
1303 Returns the number of entries in the catalog (without the header).
1304
1305 =cut
1306
1307 sub count_entries($) {
1308 my $self=shift;
1309 return $self->{count};
1310 }
1311
1312 =item count_entries_doc()
1313
1314 Returns the number of entries in document. If a string appears multiple times
1315 in the document, it will be counted multiple times
1316
1317 =cut
1318
1319 sub count_entries_doc($) {
1320 my $self=shift;
1321 return $self->{count_doc};
1322 }
1323
1324 =item msgid($)
1325
1326 Returns the msgid of the given number.
1327
1328 =cut
1329
1330 sub msgid($$) {
1331 my $self=shift;
1332 my $num=shift;
1333
1334 foreach my $msgid ( keys %{$self->{po}} ) {
1335 return $msgid if ($self->{po}{$msgid}{'pos'} eq $num);
1336 }
1337 return undef;
1338 }
1339
1340 =item msgid_doc($)
1341
1342 Returns the msgid with the given position in the document.
1343
1344 =cut
1345
1346 sub msgid_doc($$) {
1347 my $self=shift;
1348 my $num=shift;
1349
1350 foreach my $msgid ( keys %{$self->{po}} ) {
1351 foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) {
1352 return $msgid if ($pos eq $num);
1353 }
1354 }
1355 return undef;
1356 }
1357
1358 =item get_charset()
1359
1360 Returns the character set specified in the po header. If it hasn't been
1361 set, it will return "CHARSET".
1362
1363 =cut
1364
1365 sub get_charset() {
1366 my $self=shift;
1367
1368 $self->{header} =~ /charset=(.*?)[\s\\]/;
1369
1370 if (defined $1) {
1371 return $1;
1372 } else {
1373 return "CHARSET";
1374 }
1375 }
1376
1377 =item set_charset($)
1378
1379 This sets the character set of the po header to the value specified in its
1380 first argument. If you never call this function (and no file with a specified
1381 character set is read), the default value is left to "CHARSET". This value
1382 doesn't change the behavior of this module, it's just used to fill that field
1383 in the header, and to return it in get_charset().
1384
1385 =cut
1386
1387 sub set_charset() {
1388 my $self=shift;
1389
1390 my ($newchar,$oldchar);
1391 $newchar = shift;
1392 $oldchar = $self->get_charset();
1393
1394 $self->{header} =~ s/$oldchar/$newchar/;
1395 $self->{encoder}=find_encoding($newchar);
1396 }
1397
1398 #----[ helper functions ]---------------------------------------------------
1399
1400 # transforme the string from its po file representation to the form which
1401 # should be used to print it
1402 sub unescape_text {
1403 my $text = shift;
1404
1405 print STDERR "\nunescape [$text]====" if $debug{'escape'};
1406 $text = join("",split(/\n/,$text));
1407 $text =~ s/\\"/"/g;
1408 # unescape newlines
1409 # NOTE on \G:
1410 # The following regular expression introduce newlines.
1411 # Thus, ^ doesn't match all beginnings of lines.
1412 # \G is a zero-width assertion that matches the position
1413 # of the previous substitution with s///g. As every
1414 # substitution ends by a newline, it always matches a
1415 # position just after a newline.
1416 $text =~ s/( # $1:
1417 (\G|[^\\]) # beginning of the line or any char
1418 # different from '\'
1419 (\\\\)* # followed by any even number of '\'
1420 )\\n # and followed by an escaped newline
1421 /$1\n/sgx; # single string, match globally, allow comments
1422 # unescape tabulations
1423 $text =~ s/( # $1:
1424 (\G|[^\\])# beginning of the line or any char
1425 # different from '\'
1426 (\\\\)* # followed by any even number of '\'
1427 )\\t # and followed by an escaped tabulation
1428 /$1\t/mgx; # multilines string, match globally, allow comments
1429 # and unescape the escape character
1430 $text =~ s/\\\\/\\/g;
1431 print STDERR ">$text<\n" if $debug{'escape'};
1432
1433 return $text;
1434 }
1435
1436 # transform the string to its representation as it should be written in po
1437 # files
1438 sub escape_text {
1439 my $text = shift;
1440
1441 print STDERR "\nescape [$text]====" if $debug{'escape'};
1442 $text =~ s/\\/\\\\/g;
1443 $text =~ s/"/\\"/g;
1444 $text =~ s/\n/\\n/g;
1445 $text =~ s/\t/\\t/g;
1446 print STDERR ">$text<\n" if $debug{'escape'};
1447
1448 return $text;
1449 }
1450
1451 # put quotes around the string on each lines (without escaping it)
1452 # It does also normalize the text (ie, make sure its representation is wraped
1453 # on the 80th char, but without changing the meaning of the string)
1454 sub quote_text {
1455 my $string = shift;
1456
1457 return '""' unless defined($string) && length($string);
1458
1459 print STDERR "\nquote [$string]====" if $debug{'quote'};
1460 # break lines on newlines, if any
1461 # see unescape_text for an explanation on \G
1462 $string =~ s/( # $1:
1463 (\G|[^\\]) # beginning of the line or any char
1464 # different from '\'
1465 (\\\\)* # followed by any even number of '\'
1466 \\n) # and followed by an escaped newline
1467 /$1\n/sgx; # single string, match globally, allow comments
1468 $string = wrap($string);
1469 my @string = split(/\n/,$string);
1470 $string = join ("\"\n\"",@string);
1471 $string = "\"$string\"";
1472 if (scalar @string > 1 && $string[0] ne '') {
1473 $string = "\"\"\n".$string;
1474 }
1475
1476 print STDERR ">$string<\n" if $debug{'quote'};
1477 return $string;
1478 }
1479
1480 # undo the work of the quote_text function
1481 sub unquote_text {
1482 my $string = shift;
1483 print STDERR "\nunquote [$string]====" if $debug{'quote'};
1484 $string =~ s/^""\\n//s;
1485 $string =~ s/^"(.*)"$/$1/s;
1486 $string =~ s/"\n"//gm;
1487 # Note: an even number of '\' could precede \\n, but I could not build a
1488 # document to test this
1489 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm;
1490 $string =~ s|!!DUMMYPOPM!!|\\n|gm;
1491 print STDERR ">$string<\n" if $debug{'quote'};
1492 return $string;
1493 }
1494
1495 # canonize the string: write it on only one line, changing consecutive
1496 # whitespace to only one space.
1497 # Warning, it changes the string and should only be called if the string is
1498 # plain text
1499 sub canonize {
1500 my $text=shift;
1501 print STDERR "\ncanonize [$text]====" if $debug{'canonize'};
1502 $text =~ s/^ *//s;
1503 $text =~ s/^[ \t]+/ /gm;
1504 # if ($text eq "\n"), it messed up the first string (header)
1505 $text =~ s/\n/ /gm if ($text ne "\n");
1506 $text =~ s/([.)]) +/$1 /gm;
1507 $text =~ s/([^.)]) */$1 /gm;
1508 $text =~ s/ *$//s;
1509 print STDERR ">$text<\n" if $debug{'canonize'};
1510 return $text;
1511 }
1512
1513 # wraps the string. We don't use Text::Wrap since it mangles whitespace at
1514 # the end of splited line
1515 sub wrap {
1516 my $text=shift;
1517 return "0" if ($text eq '0');
1518 my $col=shift || 76;
1519 my @lines=split(/\n/,"$text");
1520 my $res="";
1521 my $first=1;
1522 while (defined(my $line=shift @lines)) {
1523 if ($first && length($line) > $col - 10) {
1524 unshift @lines,$line;
1525 $first=0;
1526 next;
1527 }
1528 if (length($line) > $col) {
1529 my $pos=rindex($line," ",$col);
1530 while (substr($line,$pos-1,1) eq '.' && $pos != -1) {
1531 $pos=rindex($line," ",$pos-1);
1532 }
1533 if ($pos == -1) {
1534 # There are no spaces in the first $col chars, pick-up the
1535 # first space
1536 $pos = index($line," ");
1537 }
1538 if ($pos != -1) {
1539 my $end=substr($line,$pos+1);
1540 $line=substr($line,0,$pos+1);
1541 if ($end =~ s/^( +)//) {
1542 $line .= $1;
1543 }
1544 unshift @lines,$end;
1545 }
1546 }
1547 $first=0;
1548 $res.="$line\n";
1549 }
1550 # Restore the original trailing spaces
1551 $res =~ s/\s+$//s;
1552 if ($text =~ m/(\s+)$/s) {
1553 $res .= $1;
1554 }
1555 return $res;
1556 }
1557
1558 # outputs properly a '# ... ' line to be put in the po file
1559 sub format_comment {
1560 my $comment=shift;
1561 my $char=shift;
1562 my $result = "#". $char . $comment;
1563 $result =~ s/\n/\n#$char/gs;
1564 $result =~ s/^#$char$/#/gm;
1565 $result .= "\n";
1566 return $result;
1567 }
1568
1569
1570 1;
1571 __END__
1572
1573 =back
1574
1575 =head1 AUTHORS
1576
1577 Denis Barbier <barbier@linuxfr.org>
1578 Martin Quinson (mquinson#debian.org)
1579
1580 =cut