Mercurial > pidgin
annotate po/check_po.pl @ 25886:a4a17fe80160
explicit merge of 'f7807039eeef499a403d638cbcb94060a1d33eea'
and '59d53e7606e3c4bd860b9fd6c786f61acf61476b'
author | Richard Laager <rlaager@wiktel.com> |
---|---|
date | Fri, 02 Jan 2009 22:17:09 +0000 |
parents | 72f90ea7ae34 |
children | a675137fc598 |
rev | line source |
---|---|
22015
72f90ea7ae34
Don't assume perl is at /usr/bin/perl, use /usr/bin/env instead. This is not
Richard Laager <rlaager@wiktel.com>
parents:
6238
diff
changeset
|
1 #!/usr/bin/env perl -w |
6238 | 2 # |
3 # check_po.pl - check po file translations for likely errors | |
4 # | |
5 # Written by David W. Pfitzner dwp@mso.anu.edu.au | |
6 # This script is hereby placed in the Public Domain. | |
7 # | |
8 # Various checks on po file translations: | |
9 # - printf-style format strings; | |
10 # - differences in trailing newlines; | |
11 # - empty (non-fuzzy) msgid; | |
12 # - likely whitespace errors on joining multi-line entries | |
13 # Ignores all fuzzy entries. | |
14 # | |
15 # Options: | |
16 # -x Don't do standard checks above (eg, just check one of below). | |
17 # -n Check newlines within strings; ie, that have equal numbers | |
18 # of newlines in msgstr and msgid. (Optional because this may | |
19 # happen legitimately.) | |
20 # -w Check leading whitespace. Sometimes whitespace is simply | |
21 # spacing (eg, for widget labels etc), or punctuation differences, | |
22 # so this may be ok. | |
23 # -W Check trailing whitespace. See -w above. | |
24 # -p Check trailing punctuation. | |
25 # -c Check capitalization of first non-whitespace character | |
26 # (only if [a-zA-Z]). | |
27 # -e Check on empty (c.q. new) msgstr | |
28 # | |
29 # Reads stdin (or filename args, via <>), writes any problems to stdout. | |
30 # | |
31 # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms | |
32 # | |
33 # Version: 0.41 (2002-06-06) | |
34 | |
35 use strict; | |
36 use vars qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e); | |
37 use Getopt::Std; | |
38 | |
39 getopts('cnpwWxe'); | |
40 | |
41 # Globals, for current po entry: | |
42 # | |
43 # Note that msgid and msgstr have newlines represented by the | |
44 # two characters '\' and 'n' (and similarly for other escapes). | |
45 | |
46 my @amsgid; # lines exactly as in input | |
47 my @amsgstr; | |
48 my $entryline; # lineno where entry starts | |
49 my $msgid; # lines joined by "" | |
50 my $msgstr; | |
51 my $is_fuzzy; | |
52 my $is_cformat; | |
53 my $state; # From constant values below. | |
54 my $did_print; # Whether we have printed this entry, to | |
55 # print only once for multiple problems. | |
56 | |
57 use constant S_LOOKING_START => 0; # looking for start of entry | |
58 use constant S_DOING_MSGID => 1; # doing msgid part | |
59 use constant S_DOING_MSGSTR => 2; # doing msgstr part | |
60 | |
61 # Initialize or reinitalize globals to prepare for new entry: | |
62 sub new_entry { | |
63 @amsgid = (); | |
64 @amsgstr = (); | |
65 $msgid = undef; | |
66 $msgstr = undef; | |
67 $entryline = 0; | |
68 $is_fuzzy = 0; | |
69 $is_cformat = 0; | |
70 $did_print = 0; | |
71 $state = S_LOOKING_START; | |
72 } | |
73 | |
74 # Nicely print either a "msgid" or "msgstr" (name is one of these) | |
75 # with given array of data. | |
76 sub print_one { | |
77 my $name = shift; | |
78 print " $name \"", join("\"\n \"", @_), "\"\n"; | |
79 } | |
80 | |
81 # Print a problem (args like print()), preceeded by entry unless | |
82 # we have already printed that: label, and msgid and msgstr. | |
83 # | |
84 sub print_problem { | |
85 unless ($did_print) { | |
86 print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n"; | |
87 print_one("msgid", @amsgid); | |
88 print_one("msgstr", @amsgstr); | |
89 $did_print = 1; | |
90 } | |
91 print "*** ", @_; | |
92 } | |
93 | |
94 # Check final newline: probably, translations should end in a newline | |
95 # if and only if the original string does. | |
96 # (See also check_trailing_whitespace and check_num_newlines below.) | |
97 # | |
98 sub check_trailing_newlines { | |
99 if ($opt_x) { return; } | |
100 | |
101 my ($ichar, $schar); | |
102 | |
103 $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : ""; | |
104 $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : ""; | |
105 | |
106 if ($ichar eq "\\n" && $schar ne "\\n") { | |
107 print_problem "Missing trailing newline\n"; | |
108 } | |
109 if ($ichar ne "\\n" && $schar eq "\\n") { | |
110 print_problem "Extra trailing newline\n"; | |
111 } | |
112 | |
113 } | |
114 | |
115 # Check leading whitespace. In general, any leading whitespace should | |
116 # be the same in msgstr and msgid -- but not always. | |
117 # | |
118 sub check_leading_whitespace { | |
119 unless ($opt_w) { return; } | |
120 | |
121 my ($id, $str); | |
122 | |
123 if ($msgid =~ m/^(\s+)/) { | |
124 $id = $1; | |
125 } else { | |
126 $id = ""; | |
127 } | |
128 if ($msgstr =~ m/^(\s+)/) { | |
129 $str = $1; | |
130 } else { | |
131 $str = ""; | |
132 } | |
133 if ($id ne $str) { | |
134 print_problem "Different leading whitespace\n"; | |
135 } | |
136 } | |
137 | |
138 # Check trailing whitespace. In general, any trailing whitespace should | |
139 # be the same in msgstr and msgid -- but not always. | |
140 # | |
141 sub check_trailing_whitespace { | |
142 unless ($opt_W) { return; } | |
143 | |
144 my ($id, $str); | |
145 | |
146 if ($msgid =~ m/((?:\s|\\n)+)$/) { | |
147 $id = $1; | |
148 } else { | |
149 $id = ""; | |
150 } | |
151 if ($msgstr =~ m/((?:\s|\\n)+)$/) { | |
152 $str = $1; | |
153 } else { | |
154 $str = ""; | |
155 } | |
156 if ($id ne $str) { | |
157 print_problem "Different trailing whitespace\n"; | |
158 } | |
159 } | |
160 | |
161 # Check equal numbers of newlines. In general ... etc. | |
162 # | |
163 sub check_num_newlines { | |
164 unless ($opt_n) { return; } | |
165 | |
166 my $num_i = ($msgid =~ m(\\n)g); | |
167 my $num_s = ($msgstr =~ m(\\n)g); | |
168 | |
169 if ($num_i != $num_s) { | |
170 print_problem "Mismatch in newline count\n"; | |
171 } | |
172 | |
173 } | |
174 | |
175 # Check capitalization of first non-whitespace character (for [a-zA-Z] | |
176 # only). In general ... etc. | |
177 # | |
178 sub check_leading_capitalization { | |
179 unless ($opt_c) { return; } | |
180 | |
181 my ($id, $str); | |
182 | |
183 if ($msgid =~ m/^\s*([a-zA-Z])/) { | |
184 $id = $1; | |
185 } | |
186 if ($msgstr =~ m/^\s*([a-zA-Z])/) { | |
187 $str = $1; | |
188 } | |
189 if (defined($id) && defined($str)) { | |
190 if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) || | |
191 ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) { | |
192 print_problem "Different leading capitalization\n"; | |
193 } | |
194 } | |
195 } | |
196 | |
197 # Check trailing 'punctuation' characters (ignoring trailing whitespace). | |
198 # In general .. etc. | |
199 # | |
200 sub check_trailing_punctuation { | |
201 unless ($opt_p) { return; } | |
202 | |
203 my ($id, $str); | |
204 | |
205 # Might want more characters: | |
206 if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { | |
207 $id = $1; | |
208 } else { | |
209 $id = ""; | |
210 } | |
211 if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { | |
212 $str = $1; | |
213 } else { | |
214 $str = ""; | |
215 } | |
216 ##print "$id $str\n"; | |
217 if ($id ne $str) { | |
218 print_problem "Different trailing punctuation\n"; | |
219 } | |
220 } | |
221 | |
222 # Check that multiline strings have whitespace separation, since | |
223 # otherwise, eg: | |
224 # msgstr "this is a multiline" | |
225 # "string" | |
226 # expands to: | |
227 # "this is a multilinestring" | |
228 # | |
229 sub check_whitespace_joins { | |
230 if ($opt_x) { return; } | |
231 | |
232 my $ok = 1; | |
233 my $i = 0; | |
234 | |
235 foreach my $aref (\@amsgid, \@amsgstr) { | |
236 my $prev = undef; | |
237 LINE: | |
238 foreach my $line (@$aref) { | |
239 if (defined($prev) | |
240 && length($prev) | |
241 && $prev !~ /\s$/ | |
242 && $prev !~ /\\n$/ | |
243 && $line !~ /^\s/ | |
244 && $line !~ /^\\n/) | |
245 { | |
246 $ok = 0; | |
247 last LINE; | |
248 } | |
249 $prev = $line; | |
250 } | |
251 if (!$ok) { | |
252 print_problem("Possible non-whitespace line-join problem in ", | |
253 ($i==0 ? "msgid" : "msgstr"), " \n"); | |
254 } | |
255 $i++; | |
256 } | |
257 } | |
258 | |
259 # Check printf-style format entries. | |
260 # Non-trivial, because translation strings may use format specifiers | |
261 # out of order, or skip some specifiers etc. Also gettext marks | |
262 # anything with '%' as cformat, though not all are. | |
263 # | |
264 sub check_cformat { | |
265 unless ($is_cformat) { return; } | |
266 if ($opt_x) { return; } | |
267 | |
268 my (@iform, @sform); | |
269 @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g); | |
270 @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g); | |
271 | |
272 ##print join("::", @iform), "\n"; | |
273 ##print join("::", @sform), "\n"; | |
274 | |
275 my $js; # index in sform | |
276 my $j; # index into iform | |
277 SFORM: | |
278 for ($js=0; $js < @sform; $js++) { | |
279 my $sf = $sform[$js]; | |
280 my $sf_orig = $sf; | |
281 if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) { | |
282 $j = $1-1; | |
283 } else { | |
284 $j = $js; | |
285 } | |
286 if ($j > $#iform) { | |
287 print_problem("Format number mismatch for $sf_orig [msgstr:", | |
288 ($js+1), "]\n"); | |
289 next SFORM; | |
290 } | |
291 my $if = $iform[$j]; | |
292 if ($sf ne $if) { | |
293 print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]", | |
294 " vs $if [msgid:", ($j+1), "]\n"); | |
295 } | |
296 } | |
297 } | |
298 | |
299 # Run all individual checks on current entry, reporting any problems. | |
300 sub check_entry { | |
301 if ($is_fuzzy) { | |
302 return; | |
303 } | |
304 $msgid = join("", @amsgid); | |
305 $msgstr = join("", @amsgstr); | |
306 | |
307 unless ($opt_x) { | |
308 if (length($msgid)==0) { | |
309 print_problem "Zero length msgid\n"; | |
310 } | |
311 } | |
312 if (length($msgstr)==0) { | |
313 unless ($opt_e) { return; } | |
314 print_problem "Untranslated msgid\n"; | |
315 } | |
316 check_cformat; | |
317 check_whitespace_joins; | |
318 check_num_newlines; | |
319 check_leading_whitespace; | |
320 check_trailing_newlines; | |
321 check_trailing_whitespace; | |
322 check_leading_capitalization; | |
323 check_trailing_punctuation; | |
324 } | |
325 | |
326 new_entry; | |
327 | |
328 LINE: | |
329 while(<>) { | |
330 if ( m(^\s*$) ) { | |
331 if ($state==S_DOING_MSGSTR) { | |
332 check_entry; | |
333 new_entry; | |
334 } | |
335 next LINE; | |
336 } | |
337 if ( m(^\#, fuzzy) ) { | |
338 $is_fuzzy = 1; | |
339 } | |
340 if ( m(^\#, .*c-format) ) { | |
341 # .* is because can have fuzzy, c-format | |
342 $is_cformat = 1; | |
343 } | |
344 if ( m(^\#) ) { | |
345 next LINE; | |
346 } | |
347 if ( m(^msgid \"(.*)\"$) ) { | |
348 $entryline = $.; | |
349 @amsgid = ($1); | |
350 $state = S_DOING_MSGID; | |
351 next LINE; | |
352 } | |
353 if ( m(^msgid_plural \"(.*)\"$) ) { | |
354 $entryline = $.; | |
355 @amsgid = ($1); | |
356 $state = S_DOING_MSGID; | |
357 next LINE; | |
358 } | |
359 if ( m(^msgstr \"(.*)\"$) ) { | |
360 @amsgstr = ($1); | |
361 $state = S_DOING_MSGSTR; | |
362 next LINE; | |
363 } | |
364 if ( m(^msgstr\[[0-2]\] \"(.*)\"$) ) { | |
365 @amsgstr = ($1); | |
366 $state = S_DOING_MSGSTR; | |
367 next LINE; | |
368 } | |
369 if ( m(^\"(.*)\"$) ) { | |
370 if ($state==S_DOING_MSGID) { | |
371 push @amsgid, $1; | |
372 } elsif($state==S_DOING_MSGSTR) { | |
373 push @amsgstr, $1; | |
374 } else { | |
375 die "Looking at string $_ in bad state $state,"; | |
376 } | |
377 next LINE; | |
378 } | |
379 die "Unexpected at $.: ", $_; | |
380 } |