Mercurial > hgbook
comparison tools/po4a/lib/Locale/Po4a/Po.pm @ 722:082bb76417f1
Add Po4a 0.37-dev(2009-03-08)
author | Dongsheng Song <dongsheng.song@gmail.com> |
---|---|
date | Thu, 12 Mar 2009 15:43:56 +0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
721:2180358c32c4 | 722:082bb76417f1 |
---|---|
1 # Locale::Po4a::Po -- manipulation of po files | |
2 # $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $ | |
3 # | |
4 # This program is free software; you may redistribute it and/or modify it | |
5 # under the terms of GPL (see COPYING). | |
6 | |
7 ############################################################################ | |
8 # Modules and declarations | |
9 ############################################################################ | |
10 | |
11 =head1 NAME | |
12 | |
13 Locale::Po4a::Po - po file manipulation module | |
14 | |
15 =head1 SYNOPSIS | |
16 | |
17 use Locale::Po4a::Po; | |
18 my $pofile=Locale::Po4a::Po->new(); | |
19 | |
20 # Read po file | |
21 $pofile->read('file.po'); | |
22 | |
23 # Add an entry | |
24 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour', | |
25 'flags' => "wrap", 'reference'=>'file.c:46'); | |
26 | |
27 # Extract a translation | |
28 $pofile->gettext("Hello"); # returns 'bonjour' | |
29 | |
30 # Write back to a file | |
31 $pofile->write('otherfile.po'); | |
32 | |
33 =head1 DESCRIPTION | |
34 | |
35 Locale::Po4a::Po is a module that allows you to manipulate message | |
36 catalogs. You can load and write from/to a file (which extension is often | |
37 I<po>), you can build new entries on the fly or request for the translation | |
38 of a string. | |
39 | |
40 For a more complete description of message catalogs in the po format and | |
41 their use, please refer to the documentation of the gettext program. | |
42 | |
43 This module is part of the PO4A project, which objective is to use po files | |
44 (designed at origin to ease the translation of program messages) to | |
45 translate everything, including documentation (man page, info manual), | |
46 package description, debconf templates, and everything which may benefit | |
47 from this. | |
48 | |
49 =head1 OPTIONS ACCEPTED BY THIS MODULE | |
50 | |
51 =over 4 | |
52 | |
53 =item porefs | |
54 | |
55 This specifies the reference format. It can be one of 'none' to not produce | |
56 any reference, 'noline' to not specify the line number, and 'full' to | |
57 include complete references. | |
58 | |
59 =back | |
60 | |
61 =cut | |
62 | |
63 use IO::File; | |
64 | |
65 | |
66 require Exporter; | |
67 | |
68 package Locale::Po4a::Po; | |
69 use DynaLoader; | |
70 | |
71 use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext); | |
72 | |
73 use subs qw(makespace); | |
74 use vars qw(@ISA @EXPORT_OK); | |
75 @ISA = qw(Exporter DynaLoader); | |
76 @EXPORT = qw(%debug); | |
77 @EXPORT_OK = qw(&move_po_if_needed); | |
78 | |
79 use Locale::Po4a::TransTractor; | |
80 # Try to use a C extension if present. | |
81 eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION"); | |
82 | |
83 use 5.006; | |
84 use strict; | |
85 use warnings; | |
86 | |
87 use Carp qw(croak); | |
88 use File::Path; # mkdir before write | |
89 use File::Copy; # move | |
90 use POSIX qw(strftime floor); | |
91 use Time::Local; | |
92 | |
93 use Encode; | |
94 | |
95 my @known_flags=qw(wrap no-wrap c-format fuzzy); | |
96 | |
97 our %debug=('canonize' => 0, | |
98 'quote' => 0, | |
99 'escape' => 0, | |
100 'encoding' => 0, | |
101 'filter' => 0); | |
102 | |
103 =head1 Functions about whole message catalogs | |
104 | |
105 =over 4 | |
106 | |
107 =item new() | |
108 | |
109 Creates a new message catalog. If an argument is provided, it's the name of | |
110 a po file we should load. | |
111 | |
112 =cut | |
113 | |
114 sub new { | |
115 my ($this, $options) = (shift, shift); | |
116 my $class = ref($this) || $this; | |
117 my $self = {}; | |
118 bless $self, $class; | |
119 $self->initialize($options); | |
120 | |
121 my $filename = shift; | |
122 $self->read($filename) if defined($filename) && length($filename); | |
123 return $self; | |
124 } | |
125 | |
126 # Return the numerical timezone (e.g. +0200) | |
127 # Neither the %z nor the %s formats of strftime are portable: | |
128 # '%s' is not supported on Solaris and '%z' indicates | |
129 # "2006-10-25 19:36E. Europe Standard Time" on MS Windows. | |
130 sub timezone { | |
131 my @g = gmtime(); | |
132 my @l = localtime(); | |
133 | |
134 my $diff; | |
135 $diff = floor(timelocal(@l)/60 +0.5); | |
136 $diff -= floor(timelocal(@g)/60 +0.5); | |
137 | |
138 my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently | |
139 # in a daylight saving time zone | |
140 my $m = $diff%60; | |
141 | |
142 return sprintf "%+03d%02d\n", $h, $m; | |
143 } | |
144 | |
145 sub initialize { | |
146 my ($self, $options) = (shift, shift); | |
147 my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone(); | |
148 chomp $date; | |
149 # $options = ref($options) || $options; | |
150 | |
151 $self->{options}{'porefs'}= 'full'; | |
152 $self->{options}{'msgid-bugs-address'}= undef; | |
153 $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc."; | |
154 $self->{options}{'package-name'}= "PACKAGE"; | |
155 $self->{options}{'package-version'}= "VERSION"; | |
156 foreach my $opt (keys %$options) { | |
157 if ($options->{$opt}) { | |
158 die wrap_mod("po4a::po", | |
159 dgettext ("po4a", "Unknown option: %s"), $opt) | |
160 unless exists $self->{options}{$opt}; | |
161 $self->{options}{$opt} = $options->{$opt}; | |
162 } | |
163 } | |
164 $self->{options}{'porefs'} =~ /^(full|noline|none)$/ || | |
165 die wrap_mod("po4a::po", | |
166 dgettext ("po4a", | |
167 "Invalid value for option 'porefs' ('%s' is ". | |
168 "not one of 'full', 'noline' or 'none')"), | |
169 $self->{options}{'porefs'}); | |
170 | |
171 $self->{po}=(); | |
172 $self->{count}=0; # number of msgids in the PO | |
173 # count_doc: number of strings in the document | |
174 # (duplicate strings counted multiple times) | |
175 $self->{count_doc}=0; | |
176 $self->{header_comment}= | |
177 " SOME DESCRIPTIVE TITLE\n" | |
178 ." Copyright (C) YEAR ". | |
179 $self->{options}{'copyright-holder'}."\n" | |
180 ." This file is distributed under the same license ". | |
181 "as the ".$self->{options}{'package-name'}." package.\n" | |
182 ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n" | |
183 ."\n" | |
184 .", fuzzy"; | |
185 # $self->header_tag="fuzzy"; | |
186 $self->{header}=escape_text("Project-Id-Version: ". | |
187 $self->{options}{'package-name'}." ". | |
188 $self->{options}{'package-version'}."\n". | |
189 ((defined $self->{options}{'msgid-bugs-address'})? | |
190 "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n": | |
191 ""). | |
192 "POT-Creation-Date: $date\n". | |
193 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n". | |
194 "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n". | |
195 "Language-Team: LANGUAGE <LL\@li.org>\n". | |
196 "MIME-Version: 1.0\n". | |
197 "Content-Type: text/plain; charset=CHARSET\n". | |
198 "Content-Transfer-Encoding: ENCODING"); | |
199 | |
200 $self->{encoder}=find_encoding("ascii"); | |
201 | |
202 # To make stats about gettext hits | |
203 $self->stats_clear(); | |
204 } | |
205 | |
206 =item read($) | |
207 | |
208 Reads a po file (which name is given as argument). Previously existing | |
209 entries in self are not removed, the new ones are added to the end of the | |
210 catalog. | |
211 | |
212 =cut | |
213 | |
214 sub read { | |
215 my $self=shift; | |
216 my $filename=shift | |
217 or croak wrap_mod("po4a::po", | |
218 dgettext("po4a", | |
219 "Please provide a non-null filename")); | |
220 | |
221 my $fh; | |
222 if ($filename eq '-') { | |
223 $fh=*STDIN; | |
224 } else { | |
225 open $fh,"<$filename" | |
226 or croak wrap_mod("po4a::po", | |
227 dgettext("po4a", "Can't read from %s: %s"), | |
228 $filename, $!); | |
229 } | |
230 | |
231 ## Read paragraphs line-by-line | |
232 my $pofile=""; | |
233 my $textline; | |
234 while (defined ($textline = <$fh>)) { | |
235 $pofile .= $textline; | |
236 } | |
237 # close INPUT | |
238 # or croak (sprintf(dgettext("po4a", | |
239 # "Can't close %s after reading: %s"), | |
240 # $filename,$!)."\n"); | |
241 | |
242 my $linenum=0; | |
243 | |
244 foreach my $msg (split (/\n\n/,$pofile)) { | |
245 my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer); | |
246 my ($msgid_plural, $msgstr_plural); | |
247 foreach my $line (split (/\n/,$msg)) { | |
248 $linenum++; | |
249 if ($line =~ /^#\. ?(.*)$/) { # Automatic comment | |
250 $automatic .= (defined($automatic) ? "\n" : "").$1; | |
251 | |
252 } elsif ($line =~ /^#: ?(.*)$/) { # reference | |
253 $reference .= (defined($reference) ? "\n" : "").$1; | |
254 | |
255 } elsif ($line =~ /^#, ?(.*)$/) { # flags | |
256 $flags .= (defined($flags) ? "\n" : "").$1; | |
257 | |
258 } elsif ($line =~ /^#(.*)$/) { # Translator comments | |
259 $comment .= (defined($comment) ? "\n" : "").($1||""); | |
260 | |
261 } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid | |
262 $buffer = $1; | |
263 | |
264 } elsif ($line =~ /^msgid_plural (".*")$/) { | |
265 # begin of msgid_plural, end of msgid | |
266 | |
267 $msgid = $buffer; | |
268 $buffer = $1; | |
269 | |
270 } elsif ($line =~ /^msgstr (".*")$/) { | |
271 # begin of msgstr, end of msgid | |
272 | |
273 $msgid = $buffer; | |
274 $buffer = "$1"; | |
275 | |
276 } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) { | |
277 # begin of msgstr[x], end of msgid_plural or msgstr[x-1] | |
278 | |
279 # Note: po4a cannot uses plural forms | |
280 # (no integer to use the plural form) | |
281 # * drop the msgstr[x] where x >= 2 | |
282 # * use msgstr[0] as the translation of msgid | |
283 # * use msgstr[1] as the translation of msgid_plural | |
284 | |
285 if ($1 eq "0") { | |
286 $msgid_plural = $buffer; | |
287 $buffer = "$2"; | |
288 } elsif ($1 eq "1") { | |
289 $msgstr = $buffer; | |
290 $buffer = "$2"; | |
291 } elsif ($1 eq "2") { | |
292 $msgstr_plural = $buffer; | |
293 warn wrap_ref_mod("$filename:$linenum", | |
294 "po4a::po", | |
295 dgettext("po4a", "Messages with more than 2 plural forms are not supported.")); | |
296 } | |
297 } elsif ($line =~ /^(".*")$/) { | |
298 # continuation of a line | |
299 $buffer .= "\n$1"; | |
300 | |
301 } else { | |
302 warn wrap_ref_mod("$filename:$linenum", | |
303 "po4a::po", | |
304 dgettext("po4a", "Strange line: -->%s<--"), | |
305 $line); | |
306 } | |
307 } | |
308 $linenum++; | |
309 if (defined $msgid_plural) { | |
310 $msgstr_plural=$buffer; | |
311 | |
312 $msgid = unquote_text($msgid) if (defined($msgid)); | |
313 $msgstr = unquote_text($msgstr) if (defined($msgstr)); | |
314 | |
315 $self->push_raw ('msgid' => $msgid, | |
316 'msgstr' => $msgstr, | |
317 'reference' => $reference, | |
318 'flags' => $flags, | |
319 'comment' => $comment, | |
320 'automatic' => $automatic, | |
321 'plural' => 0); | |
322 | |
323 $msgid_plural = unquote_text($msgid_plural) | |
324 if (defined($msgid_plural)); | |
325 $msgstr_plural = unquote_text($msgstr_plural) | |
326 if (defined($msgstr_plural)); | |
327 | |
328 $self->push_raw ('msgid' => $msgid_plural, | |
329 'msgstr' => $msgstr_plural, | |
330 'reference' => $reference, | |
331 'flags' => $flags, | |
332 'comment' => $comment, | |
333 'automatic' => $automatic, | |
334 'plural' => 1); | |
335 } else { | |
336 $msgstr=$buffer; | |
337 | |
338 $msgid = unquote_text($msgid) if (defined($msgid)); | |
339 $msgstr = unquote_text($msgstr) if (defined($msgstr)); | |
340 | |
341 $self->push_raw ('msgid' => $msgid, | |
342 'msgstr' => $msgstr, | |
343 'reference' => $reference, | |
344 'flags' => $flags, | |
345 'comment' => $comment, | |
346 'automatic' => $automatic); | |
347 } | |
348 } | |
349 } | |
350 | |
351 =item write($) | |
352 | |
353 Writes the current catalog to the given file. | |
354 | |
355 =cut | |
356 | |
357 sub write{ | |
358 my $self=shift; | |
359 my $filename=shift | |
360 or croak dgettext("po4a","Can't write to a file without filename")."\n"; | |
361 | |
362 my $fh; | |
363 if ($filename eq '-') { | |
364 $fh=\*STDOUT; | |
365 } else { | |
366 # make sure the directory in which we should write the localized | |
367 # file exists | |
368 my $dir = $filename; | |
369 if ($dir =~ m|/|) { | |
370 $dir =~ s|/[^/]*$||; | |
371 | |
372 File::Path::mkpath($dir, 0, 0755) # Croaks on error | |
373 if (length ($dir) && ! -e $dir); | |
374 } | |
375 open $fh,">$filename" | |
376 or croak wrap_mod("po4a::po", | |
377 dgettext("po4a", "Can't write to %s: %s"), | |
378 $filename, $!); | |
379 } | |
380 | |
381 print $fh "".format_comment($self->{header_comment},"") | |
382 if defined($self->{header_comment}) && length($self->{header_comment}); | |
383 | |
384 print $fh "msgid \"\"\n"; | |
385 print $fh "msgstr ".quote_text($self->{header})."\n\n"; | |
386 | |
387 | |
388 my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms | |
389 my $first=1; | |
390 foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=> | |
391 ($self->{po}{"$b"}{'pos'}) | |
392 } keys %{$self->{po}}) { | |
393 my $output=""; | |
394 | |
395 if ($first) { | |
396 $first=0; | |
397 } else { | |
398 $output .= "\n"; | |
399 } | |
400 | |
401 $output .= format_comment($self->{po}{$msgid}{'comment'},"") | |
402 if defined($self->{po}{$msgid}{'comment'}) | |
403 && length ($self->{po}{$msgid}{'comment'}); | |
404 if ( defined($self->{po}{$msgid}{'automatic'}) | |
405 && length ($self->{po}{$msgid}{'automatic'})) { | |
406 foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'})) | |
407 { | |
408 $output .= format_comment($comment, ". ") | |
409 } | |
410 } | |
411 $output .= format_comment($self->{po}{$msgid}{'type'},". type: ") | |
412 if defined($self->{po}{$msgid}{'type'}) | |
413 && length ($self->{po}{$msgid}{'type'}); | |
414 $output .= format_comment($self->{po}{$msgid}{'reference'},": ") | |
415 if defined($self->{po}{$msgid}{'reference'}) | |
416 && length ($self->{po}{$msgid}{'reference'}); | |
417 $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n" | |
418 if defined($self->{po}{$msgid}{'flags'}) | |
419 && length ($self->{po}{$msgid}{'flags'}); | |
420 | |
421 if (exists $self->{po}{$msgid}{'plural'}) { | |
422 if ($self->{po}{$msgid}{'plural'} == 0) { | |
423 if ($self->get_charset =~ /^utf-8$/i) { | |
424 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); | |
425 $msgid = Encode::decode_utf8($msgid); | |
426 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n"); | |
427 $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n"); | |
428 } else { | |
429 $output = "msgid ".quote_text($msgid)."\n"; | |
430 $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; | |
431 } | |
432 } elsif ($self->{po}{$msgid}{'plural'} == 1) { | |
433 # TODO: there may be only one plural form | |
434 if ($self->get_charset =~ /^utf-8$/i) { | |
435 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); | |
436 $msgid = Encode::decode_utf8($msgid); | |
437 $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n"); | |
438 $output .= $buf_msgstr_plural; | |
439 $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n"); | |
440 $buf_msgstr_plural = ""; | |
441 } else { | |
442 $output = "msgid_plural ".quote_text($msgid)."\n"; | |
443 $output .= $buf_msgstr_plural; | |
444 $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; | |
445 } | |
446 } else { | |
447 die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms.")); | |
448 } | |
449 } else { | |
450 if ($self->get_charset =~ /^utf-8$/i) { | |
451 my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); | |
452 $msgid = Encode::decode_utf8($msgid); | |
453 $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n"); | |
454 $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n"); | |
455 } else { | |
456 $output .= "msgid ".quote_text($msgid)."\n"; | |
457 $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; | |
458 } | |
459 } | |
460 | |
461 print $fh $output; | |
462 } | |
463 # print STDERR "$fh"; | |
464 # if ($filename ne '-') { | |
465 # close $fh | |
466 # or croak (sprintf(dgettext("po4a", | |
467 # "Can't close %s after writing: %s\n"), | |
468 # $filename,$!)); | |
469 # } | |
470 } | |
471 | |
472 =item write_if_needed($$) | |
473 | |
474 Like write, but if the PO or POT file already exists, the object will be | |
475 written in a temporary file which will be compared with the existing file | |
476 to check that the update is needed (this avoids to change a POT just to | |
477 update a line reference or the POT-Creation-Date field). | |
478 | |
479 =cut | |
480 | |
481 sub move_po_if_needed { | |
482 my ($new_po, $old_po, $backup) = (shift, shift, shift); | |
483 my $diff; | |
484 | |
485 if (-e $old_po) { | |
486 my $diff_ignore = "-I'^#:' " | |
487 ."-I'^\"POT-Creation-Date:' " | |
488 ."-I'^\"PO-Revision-Date:'"; | |
489 $diff = qx(diff -q $diff_ignore $old_po $new_po); | |
490 if ( $diff eq "" ) { | |
491 unlink $new_po | |
492 or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."), | |
493 $new_po, $!); | |
494 # touch the old PO | |
495 my ($atime, $mtime) = (time,time); | |
496 utime $atime, $mtime, $old_po; | |
497 } else { | |
498 if ($backup) { | |
499 copy $old_po, $old_po."~" | |
500 or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."), | |
501 $old_po, $old_po."~", $!); | |
502 } else { | |
503 } | |
504 move $new_po, $old_po | |
505 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."), | |
506 $new_po, $old_po, $!); | |
507 } | |
508 } else { | |
509 move $new_po, $old_po | |
510 or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."), | |
511 $new_po, $old_po, $!); | |
512 } | |
513 } | |
514 | |
515 sub write_if_needed { | |
516 my $self=shift; | |
517 my $filename=shift | |
518 or croak dgettext("po4a","Can't write to a file without filename")."\n"; | |
519 | |
520 if (-e $filename) { | |
521 my ($tmp_filename); | |
522 (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX", | |
523 DIR => "/tmp", | |
524 OPEN => 0, | |
525 UNLINK => 0); | |
526 $self->write($tmp_filename); | |
527 move_po_if_needed($tmp_filename, $filename); | |
528 } else { | |
529 $self->write($filename); | |
530 } | |
531 } | |
532 | |
533 =item gettextize($$) | |
534 | |
535 This function produces one translated message catalog from two catalogs, an | |
536 original and a translation. This process is described in L<po4a(7)|po4a.7>, | |
537 section I<Gettextization: how does it work?>. | |
538 | |
539 =cut | |
540 | |
541 sub gettextize { | |
542 my $this = shift; | |
543 my $class = ref($this) || $this; | |
544 my ($poorig,$potrans)=(shift,shift); | |
545 | |
546 my $pores=Locale::Po4a::Po->new(); | |
547 | |
548 my $please_fail = 0; | |
549 my $toobad = dgettext("po4a", | |
550 "\nThe gettextization failed (once again). Don't give up, ". | |
551 "gettextizing is a subtle art, but this is only needed once ". | |
552 "to convert a project to the gorgeous luxus offered by po4a ". | |
553 "to translators.". | |
554 "\nPlease refer to the po4a(7) documentation, the section ". | |
555 "\"HOWTO convert a pre-existing translation to po4a?\" ". | |
556 "contains several hints to help you in your task"); | |
557 | |
558 # Don't fail right now when the entry count does not match. Instead, give | |
559 # it a try so that the user can see where we fail (which is probably where | |
560 # the problem is). | |
561 if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) { | |
562 warn wrap_mod("po4a gettextize", dgettext("po4a", | |
563 "Original has more strings than the translation (%d>%d). ". | |
564 "Please fix it by editing the translated version to add ". | |
565 "some dummy entry."), | |
566 $poorig->count_entries_doc(), | |
567 $potrans->count_entries_doc()); | |
568 $please_fail = 1; | |
569 } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) { | |
570 warn wrap_mod("po4a gettextize", dgettext("po4a", | |
571 "Original has less strings than the translation (%d<%d). ". | |
572 "Please fix it by removing the extra entry from the ". | |
573 "translated file. You may need an addendum (cf po4a(7)) ". | |
574 "to reput the chunk in place after gettextization. A ". | |
575 "possible cause is that a text duplicated in the original ". | |
576 "is not translated the same way each time. Remove one of ". | |
577 "the translations, and you're fine."), | |
578 $poorig->count_entries_doc(), | |
579 $potrans->count_entries_doc()); | |
580 $please_fail = 1; | |
581 } | |
582 | |
583 if ( $poorig->get_charset =~ /^utf-8$/i ) { | |
584 $potrans->to_utf8; | |
585 $pores->set_charset("utf-8"); | |
586 } else { | |
587 if ($potrans->get_charset eq "CHARSET") { | |
588 $pores->set_charset("ascii"); | |
589 } else { | |
590 $pores->set_charset($potrans->get_charset); | |
591 } | |
592 } | |
593 print "Po character sets:\n". | |
594 " original=".$poorig->get_charset."\n". | |
595 " translated=".$potrans->get_charset."\n". | |
596 " result=".$pores->get_charset."\n" | |
597 if $debug{'encoding'}; | |
598 | |
599 for (my ($o,$t)=(0,0) ; | |
600 $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc(); | |
601 $o++,$t++) { | |
602 # | |
603 # Extract some informations | |
604 | |
605 my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t)); | |
606 # print STDERR "Matches [[$orig]]<<$trans>>\n"; | |
607 | |
608 my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'}, | |
609 $potrans->{po}{$trans}{'reference'}); | |
610 my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'}, | |
611 $potrans->{po}{$trans}{'type'}); | |
612 | |
613 # | |
614 # Make sure the type of both string exist | |
615 # | |
616 die wrap_mod("po4a gettextize", | |
617 "Internal error: type of original string number %s ". | |
618 "isn't provided", $o) | |
619 if ($typeorig eq ''); | |
620 | |
621 die wrap_mod("po4a gettextize", | |
622 "Internal error: type of translated string number %s ". | |
623 "isn't provided", $o) | |
624 if ($typetrans eq ''); | |
625 | |
626 # | |
627 # Make sure both type are the same | |
628 # | |
629 if ($typeorig ne $typetrans){ | |
630 $pores->write("gettextization.failed.po"); | |
631 die wrap_msg(dgettext("po4a", | |
632 "po4a gettextization: Structure disparity between ". | |
633 "original and translated files:\n". | |
634 "msgid (at %s) is of type '%s' while\n". | |
635 "msgstr (at %s) is of type '%s'.\n". | |
636 "Original text: %s\n". | |
637 "Translated text: %s\n". | |
638 "(result so far dumped to gettextization.failed.po)"). | |
639 "%s", | |
640 $reforig, $typeorig, | |
641 $reftrans, $typetrans, | |
642 $orig, | |
643 $trans, | |
644 $toobad); | |
645 } | |
646 | |
647 # | |
648 # Push the entry | |
649 # | |
650 my $flags; | |
651 if (defined $poorig->{po}{$orig}{'flags'}) { | |
652 $flags = $poorig->{po}{$orig}{'flags'}." fuzzy"; | |
653 } else { | |
654 $flags = "fuzzy"; | |
655 } | |
656 $pores->push_raw('msgid' => $orig, | |
657 'msgstr' => $trans, | |
658 'flags' => $flags, | |
659 'type' => $typeorig, | |
660 'reference' => $reforig, | |
661 'conflict' => 1, | |
662 'transref' => $potrans->{po}{$trans}{'reference'}) | |
663 unless (defined($pores->{po}{$orig}) | |
664 and ($pores->{po}{$orig}{'msgstr'} eq $trans)) | |
665 # FIXME: maybe we should be smarter about what reference should be | |
666 # sent to push_raw. | |
667 } | |
668 | |
669 # make sure we return a useful error message when entry count differ | |
670 die "$toobad\n" if $please_fail; | |
671 | |
672 return $pores; | |
673 } | |
674 | |
675 =item filter($) | |
676 | |
677 This function extracts a catalog from an existing one. Only the entries having | |
678 a reference in the given file will be placed in the resulting catalog. | |
679 | |
680 This function parses its argument, converts it to a perl function definition, | |
681 eval this definition and filter the fields for which this function returns | |
682 true. | |
683 | |
684 I love perl sometimes ;) | |
685 | |
686 =cut | |
687 | |
688 sub filter { | |
689 my $self=shift; | |
690 our $filter=shift; | |
691 | |
692 my $res; | |
693 $res = Locale::Po4a::Po->new(); | |
694 | |
695 # Parse the filter | |
696 our $code="sub apply { return "; | |
697 our $pos=0; | |
698 our $length = length $filter; | |
699 | |
700 # explode chars to parts. How to subscript a string in Perl? | |
701 our @filter = split(//,$filter); | |
702 | |
703 sub gloups { | |
704 my $fmt=shift; | |
705 my $space = ""; | |
706 for (1..$pos){ | |
707 $space .= ' '; | |
708 } | |
709 die wrap_msg("$fmt\n$filter\n$space^ HERE"); | |
710 } | |
711 sub showmethecode { | |
712 return unless $debug{'filter'}; | |
713 my $fmt=shift; | |
714 my $space=""; | |
715 for (1..$pos){ | |
716 $space .= ' '; | |
717 } | |
718 print STDERR "$filter\n$space^ $fmt\n";#"$code\n"; | |
719 } | |
720 | |
721 # I dream of a lex in perl :-/ | |
722 sub parse_expression { | |
723 showmethecode("Begin expression") | |
724 if $debug{'filter'}; | |
725 | |
726 gloups("Begin of expression expected, got '%s'",$filter[$pos]) | |
727 unless ($filter[$pos] eq '('); | |
728 $pos ++; # pass the '(' | |
729 if ($filter[$pos] eq '&') { | |
730 # AND | |
731 $pos++; | |
732 showmethecode("Begin of AND") | |
733 if $debug{'filter'}; | |
734 $code .= "("; | |
735 while (1) { | |
736 gloups ("Unfinished AND statement.") | |
737 if ($pos == $length); | |
738 parse_expression(); | |
739 if ($filter[$pos] eq '(') { | |
740 $code .= " && "; | |
741 } elsif ($filter[$pos] eq ')') { | |
742 last; # do not eat that char | |
743 } else { | |
744 gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]); | |
745 } | |
746 } | |
747 $code .= ")"; | |
748 } elsif ($filter[$pos] eq '|') { | |
749 # OR | |
750 $pos++; | |
751 $code .= "("; | |
752 while (1) { | |
753 gloups("Unfinished OR statement.") | |
754 if ($pos == $length); | |
755 parse_expression(); | |
756 if ($filter[$pos] eq '(') { | |
757 $code .= " || "; | |
758 } elsif ($filter[$pos] eq ')') { | |
759 last; # do not eat that char | |
760 } else { | |
761 gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]); | |
762 } | |
763 } | |
764 $code .= ")"; | |
765 } elsif ($filter[$pos] eq '!') { | |
766 # NOT | |
767 $pos++; | |
768 $code .= "(!"; | |
769 gloups("Missing sub-expression in NOT statement.") | |
770 if ($pos == $length); | |
771 parse_expression(); | |
772 $code .= ")"; | |
773 } else { | |
774 # must be an equal. Let's get field and argument | |
775 my ($field,$arg,$done); | |
776 $field = substr($filter,$pos); | |
777 gloups("EQ statement contains no '=' or invalid field name") | |
778 unless ($field =~ /([a-z]*)=/i); | |
779 $field = lc($1); | |
780 $pos += (length $field) + 1; | |
781 | |
782 # check that we've got a valid field name, | |
783 # and the number it referes to | |
784 # DO NOT CHANGE THE ORDER | |
785 my @names=qw(msgid msgstr reference flags comment automatic); | |
786 my $fieldpos; | |
787 for ($fieldpos = 0; | |
788 $fieldpos < scalar @names && $field ne $names[$fieldpos]; | |
789 $fieldpos++) {} | |
790 gloups("Invalid field name: %s",$field) | |
791 if $fieldpos == scalar @names; # not found | |
792 | |
793 # Now, get the argument value. It has to be between quotes, | |
794 # which can be escaped | |
795 # We point right on the first char of the argument | |
796 # (first quote already eaten) | |
797 my $escaped = 0; | |
798 my $quoted = 0; | |
799 if ($filter[$pos] eq '"') { | |
800 $pos++; | |
801 $quoted = 1; | |
802 } | |
803 showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'") | |
804 if $debug{'filter'}; | |
805 | |
806 while (!$done) { | |
807 gloups("Unfinished EQ argument.") | |
808 if ($pos == $length); | |
809 | |
810 if ($quoted) { | |
811 if ($filter[$pos] eq '\\') { | |
812 if ($escaped) { | |
813 $arg .= '\\'; | |
814 $escaped = 0; | |
815 } else { | |
816 $escaped = 1; | |
817 } | |
818 } elsif ($escaped) { | |
819 if ($filter[$pos] eq '"') { | |
820 $arg .= '"'; | |
821 $escaped = 0; | |
822 } else { | |
823 gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]); | |
824 } | |
825 } else { | |
826 if ($filter[$pos] eq '"') { | |
827 $done = 1; | |
828 } else { | |
829 $arg .= $filter[$pos]; | |
830 } | |
831 } | |
832 } else { | |
833 if ($filter[$pos] eq ')') { | |
834 # counter the next ++ since we don't want to eat | |
835 # this char | |
836 $pos--; | |
837 $done = 1; | |
838 } else { | |
839 $arg .= $filter[$pos]; | |
840 } | |
841 } | |
842 $pos++; | |
843 } | |
844 # and now, add the code to check this equality | |
845 $code .= "(\$_[$fieldpos] =~ m/$arg/)"; | |
846 | |
847 } | |
848 showmethecode("End of expression") | |
849 if $debug{'filter'}; | |
850 gloups("Unfinished statement.") | |
851 if ($pos == $length); | |
852 gloups("End of expression expected, got '%s'",$filter[$pos]) | |
853 unless ($filter[$pos] eq ')'); | |
854 $pos++; | |
855 } | |
856 # And now, launch the beast, finish the function and use eval | |
857 # to construct this function. | |
858 # Ok, the lack of lexer is a fair price for the eval ;) | |
859 parse_expression(); | |
860 gloups("Garbage at the end of the expression") | |
861 if ($pos != $length); | |
862 $code .= "; }"; | |
863 print STDERR "CODE = $code\n" | |
864 if $debug{'filter'}; | |
865 eval $code; | |
866 die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@) | |
867 if $@; | |
868 | |
869 for (my $cpt=(0) ; | |
870 $cpt<$self->count_entries(); | |
871 $cpt++) { | |
872 | |
873 my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic); | |
874 | |
875 $msgid = $self->msgid($cpt); | |
876 $ref=$self->{po}{$msgid}{'reference'}; | |
877 | |
878 $msgstr= $self->{po}{$msgid}{'msgstr'}; | |
879 $flags = $self->{po}{$msgid}{'flags'}; | |
880 $type = $self->{po}{$msgid}{'type'}; | |
881 $comment = $self->{po}{$msgid}{'comment'}; | |
882 $automatic = $self->{po}{$msgid}{'automatic'}; | |
883 | |
884 # DO NOT CHANGE THE ORDER | |
885 $res->push_raw('msgid' => $msgid, | |
886 'msgstr' => $msgstr, | |
887 'flags' => $flags, | |
888 'type' => $type, | |
889 'reference' => $ref, | |
890 'comment' => $comment, | |
891 'automatic' => $automatic) | |
892 if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic)); | |
893 } | |
894 # delete the apply subroutine | |
895 # otherwise it will be redefined. | |
896 undef &apply; | |
897 return $res; | |
898 } | |
899 | |
900 =item to_utf8() | |
901 | |
902 Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not | |
903 specified in the po file ("CHARSET" value), or if it's already utf-8 or | |
904 ascii. | |
905 | |
906 =cut | |
907 | |
908 sub to_utf8 { | |
909 my $this = shift; | |
910 my $charset = $this->get_charset(); | |
911 | |
912 unless ($charset eq "CHARSET" or | |
913 $charset =~ /^ascii$/i or | |
914 $charset =~ /^utf-8$/i) { | |
915 foreach my $msgid ( keys %{$this->{po}} ) { | |
916 Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8"); | |
917 } | |
918 $this->set_charset("utf-8"); | |
919 } | |
920 } | |
921 | |
922 =back | |
923 | |
924 =head1 Functions to use a message catalog for translations | |
925 | |
926 =over 4 | |
927 | |
928 =item gettext($%) | |
929 | |
930 Request the translation of the string given as argument in the current catalog. | |
931 The function returns the original (untranslated) string if the string was not | |
932 found. | |
933 | |
934 After the string to translate, you can pass a hash of extra | |
935 arguments. Here are the valid entries: | |
936 | |
937 =over | |
938 | |
939 =item wrap | |
940 | |
941 boolean indicating whether we can consider that whitespaces in string are | |
942 not important. If yes, the function canonizes the string before looking for | |
943 a translation, and wraps the result. | |
944 | |
945 =item wrapcol | |
946 | |
947 The column at which we should wrap (default: 76). | |
948 | |
949 =back | |
950 | |
951 =cut | |
952 | |
953 sub gettext { | |
954 my $self=shift; | |
955 my $text=shift; | |
956 my (%opt)=@_; | |
957 my $res; | |
958 | |
959 return "" unless defined($text) && length($text); # Avoid returning the header. | |
960 my $validoption="reference wrap wrapcol"; | |
961 my %validoption; | |
962 | |
963 map { $validoption{$_}=1 } (split(/ /,$validoption)); | |
964 foreach (keys %opt) { | |
965 Carp::confess "internal error: unknown arg $_.\n". | |
966 "Here are the valid options: $validoption.\n" | |
967 unless $validoption{$_}; | |
968 } | |
969 | |
970 $text=canonize($text) | |
971 if ($opt{'wrap'}); | |
972 | |
973 my $esc_text=escape_text($text); | |
974 | |
975 $self->{gettextqueries}++; | |
976 | |
977 if ( defined $self->{po}{$esc_text} | |
978 and defined $self->{po}{$esc_text}{'msgstr'} | |
979 and length $self->{po}{$esc_text}{'msgstr'} | |
980 and ( not defined $self->{po}{$esc_text}{'flags'} | |
981 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) { | |
982 | |
983 $self->{gettexthits}++; | |
984 $res = unescape_text($self->{po}{$esc_text}{'msgstr'}); | |
985 if (defined $self->{po}{$esc_text}{'plural'}) { | |
986 if ($self->{po}{$esc_text}{'plural'} eq "0") { | |
987 warn wrap_mod("po4a gettextize", dgettext("po4a", | |
988 "'%s' is the singular form of a message, ". | |
989 "po4a will use the msgstr[0] translation (%s)."), | |
990 $esc_text, $res); | |
991 } else { | |
992 warn wrap_mod("po4a gettextize", dgettext("po4a", | |
993 "'%s' is the plural form of a message, ". | |
994 "po4a will use the msgstr[1] translation (%s)."), | |
995 $esc_text, $res); | |
996 } | |
997 } | |
998 } else { | |
999 $res = $text; | |
1000 } | |
1001 | |
1002 if ($opt{'wrap'}) { | |
1003 if ($self->get_charset =~ /^utf-8$/i) { | |
1004 $res=Encode::decode_utf8($res); | |
1005 $res=wrap ($res, $opt{'wrapcol'} || 76); | |
1006 $res=Encode::encode_utf8($res); | |
1007 } else { | |
1008 $res=wrap ($res, $opt{'wrapcol'} || 76); | |
1009 } | |
1010 } | |
1011 # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n"; | |
1012 return $res; | |
1013 } | |
1014 | |
1015 =item stats_get() | |
1016 | |
1017 Returns statistics about the hit ratio of gettext since the last time that | |
1018 stats_clear() was called. Please note that it's not the same | |
1019 statistics than the one printed by msgfmt --statistic. Here, it's statistics | |
1020 about recent usage of the po file, while msgfmt reports the status of the | |
1021 file. Example of use: | |
1022 | |
1023 [some use of the po file to translate stuff] | |
1024 | |
1025 ($percent,$hit,$queries) = $pofile->stats_get(); | |
1026 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n"; | |
1027 | |
1028 =cut | |
1029 | |
1030 sub stats_get() { | |
1031 my $self=shift; | |
1032 my ($h,$q)=($self->{gettexthits},$self->{gettextqueries}); | |
1033 my $p = ($q == 0 ? 100 : int($h/$q*10000)/100); | |
1034 | |
1035 # $p =~ s/\.00//; | |
1036 # $p =~ s/(\..)0/$1/; | |
1037 | |
1038 return ( $p,$h,$q ); | |
1039 } | |
1040 | |
1041 =item stats_clear() | |
1042 | |
1043 Clears the statistics about gettext hits. | |
1044 | |
1045 =cut | |
1046 | |
1047 sub stats_clear { | |
1048 my $self = shift; | |
1049 $self->{gettextqueries} = 0; | |
1050 $self->{gettexthits} = 0; | |
1051 } | |
1052 | |
1053 =back | |
1054 | |
1055 =head1 Functions to build a message catalog | |
1056 | |
1057 =over 4 | |
1058 | |
1059 =item push(%) | |
1060 | |
1061 Push a new entry at the end of the current catalog. The arguments should | |
1062 form a hash table. The valid keys are: | |
1063 | |
1064 =over 4 | |
1065 | |
1066 =item msgid | |
1067 | |
1068 the string in original language. | |
1069 | |
1070 =item msgstr | |
1071 | |
1072 the translation. | |
1073 | |
1074 =item reference | |
1075 | |
1076 an indication of where this string was found. Example: file.c:46 (meaning | |
1077 in 'file.c' at line 46). It can be a space-separated list in case of | |
1078 multiple occurrences. | |
1079 | |
1080 =item comment | |
1081 | |
1082 a comment added here manually (by the translators). The format here is free. | |
1083 | |
1084 =item automatic | |
1085 | |
1086 a comment which was automatically added by the string extraction | |
1087 program. See the I<--add-comments> option of the B<xgettext> program for | |
1088 more information. | |
1089 | |
1090 =item flags | |
1091 | |
1092 space-separated list of all defined flags for this entry. | |
1093 | |
1094 Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text, | |
1095 smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text, | |
1096 tcl-text, wrap, no-wrap and fuzzy. | |
1097 | |
1098 See the gettext documentation for their meaning. | |
1099 | |
1100 =item type | |
1101 | |
1102 This is mostly an internal argument: it is used while gettextizing | |
1103 documents. The idea here is to parse both the original and the translation | |
1104 into a po object, and merge them, using one's msgid as msgid and the | |
1105 other's msgid as msgstr. To make sure that things get ok, each msgid in po | |
1106 objects are given a type, based on their structure (like "chapt", "sect1", | |
1107 "p" and so on in docbook). If the types of strings are not the same, that | |
1108 means that both files do not share the same structure, and the process | |
1109 reports an error. | |
1110 | |
1111 This information is written as automatic comment in the po file since this | |
1112 gives to translators some context about the strings to translate. | |
1113 | |
1114 =item wrap | |
1115 | |
1116 boolean indicating whether whitespaces can be mangled in cosmetic | |
1117 reformattings. If true, the string is canonized before use. | |
1118 | |
1119 This information is written to the po file using the 'wrap' or 'no-wrap' flag. | |
1120 | |
1121 =item wrapcol | |
1122 | |
1123 The column at which we should wrap (default: 76). | |
1124 | |
1125 This information is not written to the po file. | |
1126 | |
1127 =back | |
1128 | |
1129 =cut | |
1130 | |
1131 sub push { | |
1132 my $self=shift; | |
1133 my %entry=@_; | |
1134 | |
1135 my $validoption="wrap wrapcol type msgid msgstr automatic flags reference"; | |
1136 my %validoption; | |
1137 | |
1138 map { $validoption{$_}=1 } (split(/ /,$validoption)); | |
1139 foreach (keys %entry) { | |
1140 Carp::confess "internal error: unknown arg $_.\n". | |
1141 "Here are the valid options: $validoption.\n" | |
1142 unless $validoption{$_}; | |
1143 } | |
1144 | |
1145 unless ($entry{'wrap'}) { | |
1146 $entry{'flags'} .= " no-wrap"; | |
1147 } | |
1148 if (defined ($entry{'msgid'})) { | |
1149 $entry{'msgid'} = canonize($entry{'msgid'}) | |
1150 if ($entry{'wrap'}); | |
1151 | |
1152 $entry{'msgid'} = escape_text($entry{'msgid'}); | |
1153 } | |
1154 if (defined ($entry{'msgstr'})) { | |
1155 $entry{'msgstr'} = canonize($entry{'msgstr'}) | |
1156 if ($entry{'wrap'}); | |
1157 | |
1158 $entry{'msgstr'} = escape_text($entry{'msgstr'}); | |
1159 } | |
1160 | |
1161 $self->push_raw(%entry); | |
1162 } | |
1163 | |
1164 # The same as push(), but assuming that msgid and msgstr are already escaped | |
1165 sub push_raw { | |
1166 my $self=shift; | |
1167 my %entry=@_; | |
1168 my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)= | |
1169 ($entry{'msgid'},$entry{'msgstr'}, | |
1170 $entry{'reference'},$entry{'comment'},$entry{'automatic'}, | |
1171 $entry{'flags'},$entry{'type'},$entry{'transref'}); | |
1172 my $keep_conflict = $entry{'conflict'}; | |
1173 | |
1174 # print STDERR "Push_raw\n"; | |
1175 # print STDERR " msgid=>>>$msgid<<<\n" if $msgid; | |
1176 # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr; | |
1177 # Carp::cluck " flags=$flags\n" if $flags; | |
1178 | |
1179 return unless defined($entry{'msgid'}); | |
1180 | |
1181 #no msgid => header definition | |
1182 unless (length($entry{'msgid'})) { | |
1183 # if (defined($self->{header}) && $self->{header} =~ /\S/) { | |
1184 # warn dgettext("po4a","Redefinition of the header. ". | |
1185 # "The old one will be discarded\n"); | |
1186 # } FIXME: do that iff the header isn't the default one. | |
1187 $self->{header}=$msgstr; | |
1188 $self->{header_comment}=$comment; | |
1189 my $charset = $self->get_charset; | |
1190 if ($charset ne "CHARSET") { | |
1191 $self->{encoder}=find_encoding($charset); | |
1192 } else { | |
1193 $self->{encoder}=find_encoding("ascii"); | |
1194 } | |
1195 return; | |
1196 } | |
1197 | |
1198 if ($self->{options}{'porefs'} eq "none") { | |
1199 $reference = ""; | |
1200 } elsif ($self->{options}{'porefs'} eq "noline") { | |
1201 $reference =~ s/:[0-9]*/:1/g; | |
1202 } | |
1203 | |
1204 if (defined($self->{po}{$msgid})) { | |
1205 warn wrap_mod("po4a::po", | |
1206 dgettext("po4a","msgid defined twice: %s"), | |
1207 $msgid) | |
1208 if (0); # FIXME: put a verbose stuff | |
1209 if ( defined $msgstr | |
1210 and defined $self->{po}{$msgid}{'msgstr'} | |
1211 and $self->{po}{$msgid}{'msgstr'} ne $msgstr) { | |
1212 my $txt=quote_text($msgid); | |
1213 my ($first,$second)= | |
1214 (format_comment(". ",$self->{po}{$msgid}{'reference'}). | |
1215 quote_text($self->{po}{$msgid}{'msgstr'}), | |
1216 | |
1217 format_comment(". ",$reference). | |
1218 quote_text($msgstr)); | |
1219 | |
1220 if ($keep_conflict) { | |
1221 if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) { | |
1222 $msgstr = $self->{po}{$msgid}{'msgstr'}. | |
1223 "\\n#-#-#-#-# $transref #-#-#-#-#\\n". | |
1224 $msgstr; | |
1225 } else { | |
1226 $msgstr = "#-#-#-#-# ". | |
1227 $self->{po}{$msgid}{'transref'}. | |
1228 " #-#-#-#-#\\n". | |
1229 $self->{po}{$msgid}{'msgstr'}."\\n". | |
1230 "#-#-#-#-# $transref #-#-#-#-#\\n". | |
1231 $msgstr; | |
1232 } | |
1233 # Every msgid will have the same list of references. | |
1234 # Only keep the last list. | |
1235 $self->{po}{$msgid}{'reference'} = ""; | |
1236 } else { | |
1237 warn wrap_msg(dgettext("po4a", | |
1238 "Translations don't match for:\n". | |
1239 "%s\n". | |
1240 "-->First translation:\n". | |
1241 "%s\n". | |
1242 " Second translation:\n". | |
1243 "%s\n". | |
1244 " Old translation discarded."), | |
1245 $txt,$first,$second); | |
1246 } | |
1247 } | |
1248 } | |
1249 if (defined $transref) { | |
1250 $self->{po}{$msgid}{'transref'} = $transref; | |
1251 } | |
1252 if (defined $reference) { | |
1253 if (defined $self->{po}{$msgid}{'reference'}) { | |
1254 $self->{po}{$msgid}{'reference'} .= " ".$reference; | |
1255 } else { | |
1256 $self->{po}{$msgid}{'reference'} = $reference; | |
1257 } | |
1258 } | |
1259 $self->{po}{$msgid}{'msgstr'} = $msgstr; | |
1260 $self->{po}{$msgid}{'comment'} = $comment; | |
1261 $self->{po}{$msgid}{'automatic'} = $automatic; | |
1262 if (defined($self->{po}{$msgid}{'pos_doc'})) { | |
1263 $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++; | |
1264 } else { | |
1265 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++; | |
1266 } | |
1267 unless (defined($self->{po}{$msgid}{'pos'})) { | |
1268 $self->{po}{$msgid}{'pos'} = $self->{count}++; | |
1269 } | |
1270 $self->{po}{$msgid}{'type'} = $type; | |
1271 $self->{po}{$msgid}{'plural'} = $entry{'plural'} | |
1272 if defined $entry{'plural'}; | |
1273 | |
1274 if (defined($flags)) { | |
1275 $flags = " $flags "; | |
1276 $flags =~ s/,/ /g; | |
1277 foreach my $flag (@known_flags) { | |
1278 if ($flags =~ /\s$flag\s/) { # if flag to be set | |
1279 unless ( defined($self->{po}{$msgid}{'flags'}) | |
1280 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) { | |
1281 # flag not already set | |
1282 if (defined $self->{po}{$msgid}{'flags'}) { | |
1283 $self->{po}{$msgid}{'flags'} .= " ".$flag; | |
1284 } else { | |
1285 $self->{po}{$msgid}{'flags'} = $flag; | |
1286 } | |
1287 } | |
1288 } | |
1289 } | |
1290 } | |
1291 # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n"; | |
1292 | |
1293 } | |
1294 | |
1295 =back | |
1296 | |
1297 =head1 Miscellaneous functions | |
1298 | |
1299 =over 4 | |
1300 | |
1301 =item count_entries() | |
1302 | |
1303 Returns the number of entries in the catalog (without the header). | |
1304 | |
1305 =cut | |
1306 | |
1307 sub count_entries($) { | |
1308 my $self=shift; | |
1309 return $self->{count}; | |
1310 } | |
1311 | |
1312 =item count_entries_doc() | |
1313 | |
1314 Returns the number of entries in document. If a string appears multiple times | |
1315 in the document, it will be counted multiple times | |
1316 | |
1317 =cut | |
1318 | |
1319 sub count_entries_doc($) { | |
1320 my $self=shift; | |
1321 return $self->{count_doc}; | |
1322 } | |
1323 | |
1324 =item msgid($) | |
1325 | |
1326 Returns the msgid of the given number. | |
1327 | |
1328 =cut | |
1329 | |
1330 sub msgid($$) { | |
1331 my $self=shift; | |
1332 my $num=shift; | |
1333 | |
1334 foreach my $msgid ( keys %{$self->{po}} ) { | |
1335 return $msgid if ($self->{po}{$msgid}{'pos'} eq $num); | |
1336 } | |
1337 return undef; | |
1338 } | |
1339 | |
1340 =item msgid_doc($) | |
1341 | |
1342 Returns the msgid with the given position in the document. | |
1343 | |
1344 =cut | |
1345 | |
1346 sub msgid_doc($$) { | |
1347 my $self=shift; | |
1348 my $num=shift; | |
1349 | |
1350 foreach my $msgid ( keys %{$self->{po}} ) { | |
1351 foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) { | |
1352 return $msgid if ($pos eq $num); | |
1353 } | |
1354 } | |
1355 return undef; | |
1356 } | |
1357 | |
1358 =item get_charset() | |
1359 | |
1360 Returns the character set specified in the po header. If it hasn't been | |
1361 set, it will return "CHARSET". | |
1362 | |
1363 =cut | |
1364 | |
1365 sub get_charset() { | |
1366 my $self=shift; | |
1367 | |
1368 $self->{header} =~ /charset=(.*?)[\s\\]/; | |
1369 | |
1370 if (defined $1) { | |
1371 return $1; | |
1372 } else { | |
1373 return "CHARSET"; | |
1374 } | |
1375 } | |
1376 | |
1377 =item set_charset($) | |
1378 | |
1379 This sets the character set of the po header to the value specified in its | |
1380 first argument. If you never call this function (and no file with a specified | |
1381 character set is read), the default value is left to "CHARSET". This value | |
1382 doesn't change the behavior of this module, it's just used to fill that field | |
1383 in the header, and to return it in get_charset(). | |
1384 | |
1385 =cut | |
1386 | |
1387 sub set_charset() { | |
1388 my $self=shift; | |
1389 | |
1390 my ($newchar,$oldchar); | |
1391 $newchar = shift; | |
1392 $oldchar = $self->get_charset(); | |
1393 | |
1394 $self->{header} =~ s/$oldchar/$newchar/; | |
1395 $self->{encoder}=find_encoding($newchar); | |
1396 } | |
1397 | |
1398 #----[ helper functions ]--------------------------------------------------- | |
1399 | |
1400 # transforme the string from its po file representation to the form which | |
1401 # should be used to print it | |
1402 sub unescape_text { | |
1403 my $text = shift; | |
1404 | |
1405 print STDERR "\nunescape [$text]====" if $debug{'escape'}; | |
1406 $text = join("",split(/\n/,$text)); | |
1407 $text =~ s/\\"/"/g; | |
1408 # unescape newlines | |
1409 # NOTE on \G: | |
1410 # The following regular expression introduce newlines. | |
1411 # Thus, ^ doesn't match all beginnings of lines. | |
1412 # \G is a zero-width assertion that matches the position | |
1413 # of the previous substitution with s///g. As every | |
1414 # substitution ends by a newline, it always matches a | |
1415 # position just after a newline. | |
1416 $text =~ s/( # $1: | |
1417 (\G|[^\\]) # beginning of the line or any char | |
1418 # different from '\' | |
1419 (\\\\)* # followed by any even number of '\' | |
1420 )\\n # and followed by an escaped newline | |
1421 /$1\n/sgx; # single string, match globally, allow comments | |
1422 # unescape tabulations | |
1423 $text =~ s/( # $1: | |
1424 (\G|[^\\])# beginning of the line or any char | |
1425 # different from '\' | |
1426 (\\\\)* # followed by any even number of '\' | |
1427 )\\t # and followed by an escaped tabulation | |
1428 /$1\t/mgx; # multilines string, match globally, allow comments | |
1429 # and unescape the escape character | |
1430 $text =~ s/\\\\/\\/g; | |
1431 print STDERR ">$text<\n" if $debug{'escape'}; | |
1432 | |
1433 return $text; | |
1434 } | |
1435 | |
1436 # transform the string to its representation as it should be written in po | |
1437 # files | |
1438 sub escape_text { | |
1439 my $text = shift; | |
1440 | |
1441 print STDERR "\nescape [$text]====" if $debug{'escape'}; | |
1442 $text =~ s/\\/\\\\/g; | |
1443 $text =~ s/"/\\"/g; | |
1444 $text =~ s/\n/\\n/g; | |
1445 $text =~ s/\t/\\t/g; | |
1446 print STDERR ">$text<\n" if $debug{'escape'}; | |
1447 | |
1448 return $text; | |
1449 } | |
1450 | |
1451 # put quotes around the string on each lines (without escaping it) | |
1452 # It does also normalize the text (ie, make sure its representation is wraped | |
1453 # on the 80th char, but without changing the meaning of the string) | |
1454 sub quote_text { | |
1455 my $string = shift; | |
1456 | |
1457 return '""' unless defined($string) && length($string); | |
1458 | |
1459 print STDERR "\nquote [$string]====" if $debug{'quote'}; | |
1460 # break lines on newlines, if any | |
1461 # see unescape_text for an explanation on \G | |
1462 $string =~ s/( # $1: | |
1463 (\G|[^\\]) # beginning of the line or any char | |
1464 # different from '\' | |
1465 (\\\\)* # followed by any even number of '\' | |
1466 \\n) # and followed by an escaped newline | |
1467 /$1\n/sgx; # single string, match globally, allow comments | |
1468 $string = wrap($string); | |
1469 my @string = split(/\n/,$string); | |
1470 $string = join ("\"\n\"",@string); | |
1471 $string = "\"$string\""; | |
1472 if (scalar @string > 1 && $string[0] ne '') { | |
1473 $string = "\"\"\n".$string; | |
1474 } | |
1475 | |
1476 print STDERR ">$string<\n" if $debug{'quote'}; | |
1477 return $string; | |
1478 } | |
1479 | |
1480 # undo the work of the quote_text function | |
1481 sub unquote_text { | |
1482 my $string = shift; | |
1483 print STDERR "\nunquote [$string]====" if $debug{'quote'}; | |
1484 $string =~ s/^""\\n//s; | |
1485 $string =~ s/^"(.*)"$/$1/s; | |
1486 $string =~ s/"\n"//gm; | |
1487 # Note: an even number of '\' could precede \\n, but I could not build a | |
1488 # document to test this | |
1489 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm; | |
1490 $string =~ s|!!DUMMYPOPM!!|\\n|gm; | |
1491 print STDERR ">$string<\n" if $debug{'quote'}; | |
1492 return $string; | |
1493 } | |
1494 | |
1495 # canonize the string: write it on only one line, changing consecutive | |
1496 # whitespace to only one space. | |
1497 # Warning, it changes the string and should only be called if the string is | |
1498 # plain text | |
1499 sub canonize { | |
1500 my $text=shift; | |
1501 print STDERR "\ncanonize [$text]====" if $debug{'canonize'}; | |
1502 $text =~ s/^ *//s; | |
1503 $text =~ s/^[ \t]+/ /gm; | |
1504 # if ($text eq "\n"), it messed up the first string (header) | |
1505 $text =~ s/\n/ /gm if ($text ne "\n"); | |
1506 $text =~ s/([.)]) +/$1 /gm; | |
1507 $text =~ s/([^.)]) */$1 /gm; | |
1508 $text =~ s/ *$//s; | |
1509 print STDERR ">$text<\n" if $debug{'canonize'}; | |
1510 return $text; | |
1511 } | |
1512 | |
1513 # wraps the string. We don't use Text::Wrap since it mangles whitespace at | |
1514 # the end of splited line | |
1515 sub wrap { | |
1516 my $text=shift; | |
1517 return "0" if ($text eq '0'); | |
1518 my $col=shift || 76; | |
1519 my @lines=split(/\n/,"$text"); | |
1520 my $res=""; | |
1521 my $first=1; | |
1522 while (defined(my $line=shift @lines)) { | |
1523 if ($first && length($line) > $col - 10) { | |
1524 unshift @lines,$line; | |
1525 $first=0; | |
1526 next; | |
1527 } | |
1528 if (length($line) > $col) { | |
1529 my $pos=rindex($line," ",$col); | |
1530 while (substr($line,$pos-1,1) eq '.' && $pos != -1) { | |
1531 $pos=rindex($line," ",$pos-1); | |
1532 } | |
1533 if ($pos == -1) { | |
1534 # There are no spaces in the first $col chars, pick-up the | |
1535 # first space | |
1536 $pos = index($line," "); | |
1537 } | |
1538 if ($pos != -1) { | |
1539 my $end=substr($line,$pos+1); | |
1540 $line=substr($line,0,$pos+1); | |
1541 if ($end =~ s/^( +)//) { | |
1542 $line .= $1; | |
1543 } | |
1544 unshift @lines,$end; | |
1545 } | |
1546 } | |
1547 $first=0; | |
1548 $res.="$line\n"; | |
1549 } | |
1550 # Restore the original trailing spaces | |
1551 $res =~ s/\s+$//s; | |
1552 if ($text =~ m/(\s+)$/s) { | |
1553 $res .= $1; | |
1554 } | |
1555 return $res; | |
1556 } | |
1557 | |
1558 # outputs properly a '# ... ' line to be put in the po file | |
1559 sub format_comment { | |
1560 my $comment=shift; | |
1561 my $char=shift; | |
1562 my $result = "#". $char . $comment; | |
1563 $result =~ s/\n/\n#$char/gs; | |
1564 $result =~ s/^#$char$/#/gm; | |
1565 $result .= "\n"; | |
1566 return $result; | |
1567 } | |
1568 | |
1569 | |
1570 1; | |
1571 __END__ | |
1572 | |
1573 =back | |
1574 | |
1575 =head1 AUTHORS | |
1576 | |
1577 Denis Barbier <barbier@linuxfr.org> | |
1578 Martin Quinson (mquinson#debian.org) | |
1579 | |
1580 =cut |