Mercurial > pidgin
view po/check_po.pl @ 31373:6c660dc7cb6a
Moved the conversation attributes API and the IRC periodic WHO updates to i.p.p.next.minor, where they belong.
applied changes from 3de680fff7ddd1b00149657afb7f6cd833000a90
through 7ee5e1d431651ed2b1a54bc942d63f35580af55c
applied changes from e7c103fdfbc59bb2ca41a3c8813c4ff2847a673f
through 22937ab220c41cd0c4a3f9e21e3db687db80da75
applied changes from 22937ab220c41cd0c4a3f9e21e3db687db80da75
through cba010d1c097d4e6599f08276ed9d894710c1074
applied changes from a694289accbec14c593b3636ef1f626fd8279805
through 8a43e3ddd7adacb208afe2d7ee3ea983c95901be
author | Evan Schoenberg <evan.s@dreskin.net> |
---|---|
date | Mon, 21 Feb 2011 23:08:47 +0000 |
parents | 2046abd60e0a |
children |
line wrap: on
line source
#!/usr/bin/env perl # # check_po.pl - check po file translations for likely errors # # Written by David W. Pfitzner dwp@mso.anu.edu.au # This script is hereby placed in the Public Domain. # # Various checks on po file translations: # - printf-style format strings; # - differences in trailing newlines; # - empty (non-fuzzy) msgid; # - likely whitespace errors on joining multi-line entries # Ignores all fuzzy entries. # # Options: # -x Don't do standard checks above (eg, just check one of below). # -n Check newlines within strings; ie, that have equal numbers # of newlines in msgstr and msgid. (Optional because this may # happen legitimately.) # -w Check leading whitespace. Sometimes whitespace is simply # spacing (eg, for widget labels etc), or punctuation differences, # so this may be ok. # -W Check trailing whitespace. See -w above. # -p Check trailing punctuation. # -c Check capitalization of first non-whitespace character # (only if [a-zA-Z]). # -e Check on empty (c.q. new) msgstr # # Reads stdin (or filename args, via <>), writes any problems to stdout. # # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms # # Version: 0.41 (2002-06-06) # TODO: This script needs to be able to handle Farsi's %Id flag for # number format specifiers. More information on how it works, see # http://www.gnu.org/software/hello/manual/gettext/c_002dformat.html # It's possible someone has already made this change... look around # for an updated version of this script. use strict; use vars qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e); use Getopt::Std; getopts('cnpwWxe'); # Globals, for current po entry: # # Note that msgid and msgstr have newlines represented by the # two characters '\' and 'n' (and similarly for other escapes). my @amsgid; # lines exactly as in input my @amsgstr; my $entryline; # lineno where entry starts my $msgid; # lines joined by "" my $msgstr; my $is_fuzzy; my $is_cformat; my $state; # From constant values below. my $did_print; # Whether we have printed this entry, to # print only once for multiple problems. use constant S_LOOKING_START => 0; # looking for start of entry use constant S_DOING_MSGID => 1; # doing msgid part use constant S_DOING_MSGSTR => 2; # doing msgstr part # Initialize or reinitalize globals to prepare for new entry: sub new_entry { @amsgid = (); @amsgstr = (); $msgid = undef; $msgstr = undef; $entryline = 0; $is_fuzzy = 0; $is_cformat = 0; $did_print = 0; $state = S_LOOKING_START; } # Nicely print either a "msgid" or "msgstr" (name is one of these) # with given array of data. sub print_one { my $name = shift; print " $name \"", join("\"\n \"", @_), "\"\n"; } # Print a problem (args like print()), preceeded by entry unless # we have already printed that: label, and msgid and msgstr. # sub print_problem { unless ($did_print) { print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n"; print_one("msgid", @amsgid); print_one("msgstr", @amsgstr); $did_print = 1; } print "*** ", @_; } # Check final newline: probably, translations should end in a newline # if and only if the original string does. # (See also check_trailing_whitespace and check_num_newlines below.) # sub check_trailing_newlines { if ($opt_x) { return; } my ($ichar, $schar); $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : ""; $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : ""; if ($ichar eq "\\n" && $schar ne "\\n") { print_problem "Missing trailing newline\n"; } if ($ichar ne "\\n" && $schar eq "\\n") { print_problem "Extra trailing newline\n"; } } # Check leading whitespace. In general, any leading whitespace should # be the same in msgstr and msgid -- but not always. # sub check_leading_whitespace { unless ($opt_w) { return; } my ($id, $str); if ($msgid =~ m/^(\s+)/) { $id = $1; } else { $id = ""; } if ($msgstr =~ m/^(\s+)/) { $str = $1; } else { $str = ""; } if ($id ne $str) { print_problem "Different leading whitespace\n"; } } # Check trailing whitespace. In general, any trailing whitespace should # be the same in msgstr and msgid -- but not always. # sub check_trailing_whitespace { unless ($opt_W) { return; } my ($id, $str); if ($msgid =~ m/((?:\s|\\n)+)$/) { $id = $1; } else { $id = ""; } if ($msgstr =~ m/((?:\s|\\n)+)$/) { $str = $1; } else { $str = ""; } if ($id ne $str) { print_problem "Different trailing whitespace\n"; } } # Check equal numbers of newlines. In general ... etc. # sub check_num_newlines { unless ($opt_n) { return; } my $num_i = ($msgid =~ m(\\n)g); my $num_s = ($msgstr =~ m(\\n)g); if ($num_i != $num_s) { print_problem "Mismatch in newline count\n"; } } # Check capitalization of first non-whitespace character (for [a-zA-Z] # only). In general ... etc. # sub check_leading_capitalization { unless ($opt_c) { return; } my ($id, $str); if ($msgid =~ m/^\s*([a-zA-Z])/) { $id = $1; } if ($msgstr =~ m/^\s*([a-zA-Z])/) { $str = $1; } if (defined($id) && defined($str)) { if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) || ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) { print_problem "Different leading capitalization\n"; } } } # Check trailing 'punctuation' characters (ignoring trailing whitespace). # In general .. etc. # sub check_trailing_punctuation { unless ($opt_p) { return; } my ($id, $str); # Might want more characters: if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { $id = $1; } else { $id = ""; } if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { $str = $1; } else { $str = ""; } ##print "$id $str\n"; if ($id ne $str) { print_problem "Different trailing punctuation\n"; } } # Check that multiline strings have whitespace separation, since # otherwise, eg: # msgstr "this is a multiline" # "string" # expands to: # "this is a multilinestring" # sub check_whitespace_joins { if ($opt_x) { return; } my $ok = 1; my $i = 0; foreach my $aref (\@amsgid, \@amsgstr) { my $prev = undef; LINE: foreach my $line (@$aref) { if (defined($prev) && length($prev) && $prev !~ /\s$/ && $prev !~ /\\n$/ && $line !~ /^\s/ && $line !~ /^\\n/) { $ok = 0; last LINE; } $prev = $line; } if (!$ok) { print_problem("Possible non-whitespace line-join problem in ", ($i==0 ? "msgid" : "msgstr"), " \n"); } $i++; } } # Check printf-style format entries. # Non-trivial, because translation strings may use format specifiers # out of order, or skip some specifiers etc. Also gettext marks # anything with '%' as cformat, though not all are. # sub check_cformat { unless ($is_cformat) { return; } if ($opt_x) { return; } my (@iform, @sform); @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g); @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g); ##print join("::", @iform), "\n"; ##print join("::", @sform), "\n"; my $js; # index in sform my $j; # index into iform SFORM: for ($js=0; $js < @sform; $js++) { my $sf = $sform[$js]; my $sf_orig = $sf; if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) { $j = $1-1; } else { $j = $js; } if ($j > $#iform) { print_problem("Format number mismatch for $sf_orig [msgstr:", ($js+1), "]\n"); next SFORM; } my $if = $iform[$j]; if ($sf ne $if) { print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]", " vs $if [msgid:", ($j+1), "]\n"); } } } # Run all individual checks on current entry, reporting any problems. sub check_entry { if ($is_fuzzy) { return; } $msgid = join("", @amsgid); $msgstr = join("", @amsgstr); unless ($opt_x) { if (length($msgid)==0) { print_problem "Zero length msgid\n"; } } if (length($msgstr)==0) { unless ($opt_e) { return; } print_problem "Untranslated msgid\n"; } check_cformat; check_whitespace_joins; check_num_newlines; check_leading_whitespace; check_trailing_newlines; check_trailing_whitespace; check_leading_capitalization; check_trailing_punctuation; } new_entry; LINE: while(<>) { if ( m(^\s*$) ) { if ($state==S_DOING_MSGSTR) { check_entry; new_entry; } next LINE; } if ( m(^\#, fuzzy) ) { $is_fuzzy = 1; } if ( m(^\#, .*c-format) ) { # .* is because can have fuzzy, c-format $is_cformat = 1; } if ( m(^\#) ) { next LINE; } if ( m(^msgid \"(.*)\"$) ) { $entryline = $.; @amsgid = ($1); $state = S_DOING_MSGID; next LINE; } if ( m(^msgid_plural \"(.*)\"$) ) { $entryline = $.; @amsgid = ($1); $state = S_DOING_MSGID; next LINE; } if ( m(^msgstr \"(.*)\"$) ) { @amsgstr = ($1); $state = S_DOING_MSGSTR; next LINE; } if ( m(^msgstr\[[0-5]\] \"(.*)\"$) ) { @amsgstr = ($1); $state = S_DOING_MSGSTR; next LINE; } if ( m(^\"(.*)\"$) ) { if ($state==S_DOING_MSGID) { push @amsgid, $1; } elsif($state==S_DOING_MSGSTR) { push @amsgstr, $1; } else { die "Looking at string $_ in bad state $state,"; } next LINE; } die "Unexpected at $.: ", $_; }