Mercurial > pidgin
changeset 6238:6173354a64dc
[gaim-migrate @ 6731]
Adding a nice little utility for checking translated files. Also, fixed up
a few things in nl.po.
committer: Tailor Script <tailor@pidgin.im>
author | Christian Hammond <chipx86@chipx86.com> |
---|---|
date | Sat, 19 Jul 2003 20:57:58 +0000 |
parents | 6e2f46de5e38 |
children | 8d10cdfe1bb1 |
files | po/check_po.pl po/nl.po |
diffstat | 2 files changed, 393 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/po/check_po.pl Sat Jul 19 20:57:58 2003 +0000 @@ -0,0 +1,380 @@ +#!/usr/bin/perl -w +# +# check_po.pl - check po file translations for likely errors +# +# Written by David W. Pfitzner dwp@mso.anu.edu.au +# This script is hereby placed in the Public Domain. +# +# Various checks on po file translations: +# - printf-style format strings; +# - differences in trailing newlines; +# - empty (non-fuzzy) msgid; +# - likely whitespace errors on joining multi-line entries +# Ignores all fuzzy entries. +# +# Options: +# -x Don't do standard checks above (eg, just check one of below). +# -n Check newlines within strings; ie, that have equal numbers +# of newlines in msgstr and msgid. (Optional because this may +# happen legitimately.) +# -w Check leading whitespace. Sometimes whitespace is simply +# spacing (eg, for widget labels etc), or punctuation differences, +# so this may be ok. +# -W Check trailing whitespace. See -w above. +# -p Check trailing punctuation. +# -c Check capitalization of first non-whitespace character +# (only if [a-zA-Z]). +# -e Check on empty (c.q. new) msgstr +# +# Reads stdin (or filename args, via <>), writes any problems to stdout. +# +# Modified by Davide Pagnin nightmare@freeciv.it to support plural forms +# +# Version: 0.41 (2002-06-06) + +use strict; +use vars qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e); +use Getopt::Std; + +getopts('cnpwWxe'); + +# Globals, for current po entry: +# +# Note that msgid and msgstr have newlines represented by the +# two characters '\' and 'n' (and similarly for other escapes). + +my @amsgid; # lines exactly as in input +my @amsgstr; +my $entryline; # lineno where entry starts +my $msgid; # lines joined by "" +my $msgstr; +my $is_fuzzy; +my $is_cformat; +my $state; # From constant values below. +my $did_print; # Whether we have printed this entry, to + # print only once for multiple problems. + +use constant S_LOOKING_START => 0; # looking for start of entry +use constant S_DOING_MSGID => 1; # doing msgid part +use constant S_DOING_MSGSTR => 2; # doing msgstr part + +# Initialize or reinitalize globals to prepare for new entry: +sub new_entry { + @amsgid = (); + @amsgstr = (); + $msgid = undef; + $msgstr = undef; + $entryline = 0; + $is_fuzzy = 0; + $is_cformat = 0; + $did_print = 0; + $state = S_LOOKING_START; +} + +# Nicely print either a "msgid" or "msgstr" (name is one of these) +# with given array of data. +sub print_one { + my $name = shift; + print " $name \"", join("\"\n \"", @_), "\"\n"; +} + +# Print a problem (args like print()), preceeded by entry unless +# we have already printed that: label, and msgid and msgstr. +# +sub print_problem { + unless ($did_print) { + print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n"; + print_one("msgid", @amsgid); + print_one("msgstr", @amsgstr); + $did_print = 1; + } + print "*** ", @_; +} + +# Check final newline: probably, translations should end in a newline +# if and only if the original string does. +# (See also check_trailing_whitespace and check_num_newlines below.) +# +sub check_trailing_newlines { + if ($opt_x) { return; } + + my ($ichar, $schar); + + $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : ""; + $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : ""; + + if ($ichar eq "\\n" && $schar ne "\\n") { + print_problem "Missing trailing newline\n"; + } + if ($ichar ne "\\n" && $schar eq "\\n") { + print_problem "Extra trailing newline\n"; + } + +} + +# Check leading whitespace. In general, any leading whitespace should +# be the same in msgstr and msgid -- but not always. +# +sub check_leading_whitespace { + unless ($opt_w) { return; } + + my ($id, $str); + + if ($msgid =~ m/^(\s+)/) { + $id = $1; + } else { + $id = ""; + } + if ($msgstr =~ m/^(\s+)/) { + $str = $1; + } else { + $str = ""; + } + if ($id ne $str) { + print_problem "Different leading whitespace\n"; + } +} + +# Check trailing whitespace. In general, any trailing whitespace should +# be the same in msgstr and msgid -- but not always. +# +sub check_trailing_whitespace { + unless ($opt_W) { return; } + + my ($id, $str); + + if ($msgid =~ m/((?:\s|\\n)+)$/) { + $id = $1; + } else { + $id = ""; + } + if ($msgstr =~ m/((?:\s|\\n)+)$/) { + $str = $1; + } else { + $str = ""; + } + if ($id ne $str) { + print_problem "Different trailing whitespace\n"; + } +} + +# Check equal numbers of newlines. In general ... etc. +# +sub check_num_newlines { + unless ($opt_n) { return; } + + my $num_i = ($msgid =~ m(\\n)g); + my $num_s = ($msgstr =~ m(\\n)g); + + if ($num_i != $num_s) { + print_problem "Mismatch in newline count\n"; + } + +} + +# Check capitalization of first non-whitespace character (for [a-zA-Z] +# only). In general ... etc. +# +sub check_leading_capitalization { + unless ($opt_c) { return; } + + my ($id, $str); + + if ($msgid =~ m/^\s*([a-zA-Z])/) { + $id = $1; + } + if ($msgstr =~ m/^\s*([a-zA-Z])/) { + $str = $1; + } + if (defined($id) && defined($str)) { + if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) || + ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) { + print_problem "Different leading capitalization\n"; + } + } +} + +# Check trailing 'punctuation' characters (ignoring trailing whitespace). +# In general .. etc. +# +sub check_trailing_punctuation { + unless ($opt_p) { return; } + + my ($id, $str); + + # Might want more characters: + if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { + $id = $1; + } else { + $id = ""; + } + if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { + $str = $1; + } else { + $str = ""; + } + ##print "$id $str\n"; + if ($id ne $str) { + print_problem "Different trailing punctuation\n"; + } +} + +# Check that multiline strings have whitespace separation, since +# otherwise, eg: +# msgstr "this is a multiline" +# "string" +# expands to: +# "this is a multilinestring" +# +sub check_whitespace_joins { + if ($opt_x) { return; } + + my $ok = 1; + my $i = 0; + + foreach my $aref (\@amsgid, \@amsgstr) { + my $prev = undef; + LINE: + foreach my $line (@$aref) { + if (defined($prev) + && length($prev) + && $prev !~ /\s$/ + && $prev !~ /\\n$/ + && $line !~ /^\s/ + && $line !~ /^\\n/) + { + $ok = 0; + last LINE; + } + $prev = $line; + } + if (!$ok) { + print_problem("Possible non-whitespace line-join problem in ", + ($i==0 ? "msgid" : "msgstr"), " \n"); + } + $i++; + } +} + +# Check printf-style format entries. +# Non-trivial, because translation strings may use format specifiers +# out of order, or skip some specifiers etc. Also gettext marks +# anything with '%' as cformat, though not all are. +# +sub check_cformat { + unless ($is_cformat) { return; } + if ($opt_x) { return; } + + my (@iform, @sform); + @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g); + @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g); + + ##print join("::", @iform), "\n"; + ##print join("::", @sform), "\n"; + + my $js; # index in sform + my $j; # index into iform + SFORM: + for ($js=0; $js < @sform; $js++) { + my $sf = $sform[$js]; + my $sf_orig = $sf; + if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) { + $j = $1-1; + } else { + $j = $js; + } + if ($j > $#iform) { + print_problem("Format number mismatch for $sf_orig [msgstr:", + ($js+1), "]\n"); + next SFORM; + } + my $if = $iform[$j]; + if ($sf ne $if) { + print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]", + " vs $if [msgid:", ($j+1), "]\n"); + } + } +} + +# Run all individual checks on current entry, reporting any problems. +sub check_entry { + if ($is_fuzzy) { + return; + } + $msgid = join("", @amsgid); + $msgstr = join("", @amsgstr); + + unless ($opt_x) { + if (length($msgid)==0) { + print_problem "Zero length msgid\n"; + } + } + if (length($msgstr)==0) { + unless ($opt_e) { return; } + print_problem "Untranslated msgid\n"; + } + check_cformat; + check_whitespace_joins; + check_num_newlines; + check_leading_whitespace; + check_trailing_newlines; + check_trailing_whitespace; + check_leading_capitalization; + check_trailing_punctuation; +} + +new_entry; + +LINE: +while(<>) { + if ( m(^\s*$) ) { + if ($state==S_DOING_MSGSTR) { + check_entry; + new_entry; + } + next LINE; + } + if ( m(^\#, fuzzy) ) { + $is_fuzzy = 1; + } + if ( m(^\#, .*c-format) ) { + # .* is because can have fuzzy, c-format + $is_cformat = 1; + } + if ( m(^\#) ) { + next LINE; + } + if ( m(^msgid \"(.*)\"$) ) { + $entryline = $.; + @amsgid = ($1); + $state = S_DOING_MSGID; + next LINE; + } + if ( m(^msgid_plural \"(.*)\"$) ) { + $entryline = $.; + @amsgid = ($1); + $state = S_DOING_MSGID; + next LINE; + } + if ( m(^msgstr \"(.*)\"$) ) { + @amsgstr = ($1); + $state = S_DOING_MSGSTR; + next LINE; + } + if ( m(^msgstr\[[0-2]\] \"(.*)\"$) ) { + @amsgstr = ($1); + $state = S_DOING_MSGSTR; + next LINE; + } + if ( m(^\"(.*)\"$) ) { + if ($state==S_DOING_MSGID) { + push @amsgid, $1; + } elsif($state==S_DOING_MSGSTR) { + push @amsgstr, $1; + } else { + die "Looking at string $_ in bad state $state,"; + } + next LINE; + } + die "Unexpected at $.: ", $_; +}
--- a/po/nl.po Sat Jul 19 20:31:24 2003 +0000 +++ b/po/nl.po Sat Jul 19 20:57:58 2003 +0000 @@ -2017,7 +2017,7 @@ #: src/protocols/msn/msn.c:324 src/protocols/yahoo/yahoo.c:1126 #, c-format msgid "<b>Status:</b> %s" -msgstr "<B>Status:</B> %s" +msgstr "<b>Status:</b> %s" #: src/protocols/msn/msn.c:337 src/protocols/msn/msn.c:592 #: src/protocols/msn/state.c:32 @@ -4989,7 +4989,9 @@ msgid "" "\n" "<b>Account:</b> %s" -msgstr "<B>Status:</B> %s" +msgstr "" +"\n" +"<b>Account:</b> %s" #: src/gtkblist.c:908 #, c-format @@ -5001,7 +5003,9 @@ msgid "" "\n" "<b>Account:</b>" -msgstr "Account:" +msgstr "" +"\n" +"<b>Account:</b>" #: src/gtkblist.c:923 msgid "" @@ -5048,14 +5052,18 @@ msgid "" "\n" "<b>Status</b>: Awesome" -msgstr "<B>Status:</B> %s" +msgstr "" +"\n" +"<b>Status:</b>: Awesome" #: src/gtkblist.c:930 #, fuzzy msgid "" "\n" "<b>Status</b>: Rockin'" -msgstr "<b>Status:</b> " +msgstr "" +"\n" +"<b>Status:</b>: Rockin'" #: src/gtkblist.c:1227 #, c-format