Mercurial > pidgin.yaz
annotate po/check_po.pl @ 24688:dce57ce0b860
Change the way we handle <span style='font-size: whatever;'> tags slightly.
We really only support a few specific values for that css attribute--we
don't support things like "12pt" for example. But this code was setting
<font size="3"> for any unknown font-size css value. So if someone
specified a css font-size of 72px we set the font size to 3, which is
basically the default font size, which is clearly wrong.
So change this to just do nothing if we don't understand the value.
author | Mark Doliner <mark@kingant.net> |
---|---|
date | Mon, 15 Dec 2008 01:13:17 +0000 |
parents | 72f90ea7ae34 |
children | a675137fc598 |
rev | line source |
---|---|
22015
72f90ea7ae34
Don't assume perl is at /usr/bin/perl, use /usr/bin/env instead. This is not
Richard Laager <rlaager@wiktel.com>
parents:
6238
diff
changeset
|
1 #!/usr/bin/env perl -w |
6238 | 2 # |
3 # check_po.pl - check po file translations for likely errors | |
4 # | |
5 # Written by David W. Pfitzner dwp@mso.anu.edu.au | |
6 # This script is hereby placed in the Public Domain. | |
7 # | |
8 # Various checks on po file translations: | |
9 # - printf-style format strings; | |
10 # - differences in trailing newlines; | |
11 # - empty (non-fuzzy) msgid; | |
12 # - likely whitespace errors on joining multi-line entries | |
13 # Ignores all fuzzy entries. | |
14 # | |
15 # Options: | |
16 # -x Don't do standard checks above (eg, just check one of below). | |
17 # -n Check newlines within strings; ie, that have equal numbers | |
18 # of newlines in msgstr and msgid. (Optional because this may | |
19 # happen legitimately.) | |
20 # -w Check leading whitespace. Sometimes whitespace is simply | |
21 # spacing (eg, for widget labels etc), or punctuation differences, | |
22 # so this may be ok. | |
23 # -W Check trailing whitespace. See -w above. | |
24 # -p Check trailing punctuation. | |
25 # -c Check capitalization of first non-whitespace character | |
26 # (only if [a-zA-Z]). | |
27 # -e Check on empty (c.q. new) msgstr | |
28 # | |
29 # Reads stdin (or filename args, via <>), writes any problems to stdout. | |
30 # | |
31 # Modified by Davide Pagnin nightmare@freeciv.it to support plural forms | |
32 # | |
33 # Version: 0.41 (2002-06-06) | |
34 | |
35 use strict; | |
36 use vars qw($opt_c $opt_n $opt_p $opt_w $opt_W $opt_x $opt_e); | |
37 use Getopt::Std; | |
38 | |
39 getopts('cnpwWxe'); | |
40 | |
41 # Globals, for current po entry: | |
42 # | |
43 # Note that msgid and msgstr have newlines represented by the | |
44 # two characters '\' and 'n' (and similarly for other escapes). | |
45 | |
46 my @amsgid; # lines exactly as in input | |
47 my @amsgstr; | |
48 my $entryline; # lineno where entry starts | |
49 my $msgid; # lines joined by "" | |
50 my $msgstr; | |
51 my $is_fuzzy; | |
52 my $is_cformat; | |
53 my $state; # From constant values below. | |
54 my $did_print; # Whether we have printed this entry, to | |
55 # print only once for multiple problems. | |
56 | |
57 use constant S_LOOKING_START => 0; # looking for start of entry | |
58 use constant S_DOING_MSGID => 1; # doing msgid part | |
59 use constant S_DOING_MSGSTR => 2; # doing msgstr part | |
60 | |
61 # Initialize or reinitalize globals to prepare for new entry: | |
62 sub new_entry { | |
63 @amsgid = (); | |
64 @amsgstr = (); | |
65 $msgid = undef; | |
66 $msgstr = undef; | |
67 $entryline = 0; | |
68 $is_fuzzy = 0; | |
69 $is_cformat = 0; | |
70 $did_print = 0; | |
71 $state = S_LOOKING_START; | |
72 } | |
73 | |
74 # Nicely print either a "msgid" or "msgstr" (name is one of these) | |
75 # with given array of data. | |
76 sub print_one { | |
77 my $name = shift; | |
78 print " $name \"", join("\"\n \"", @_), "\"\n"; | |
79 } | |
80 | |
81 # Print a problem (args like print()), preceeded by entry unless | |
82 # we have already printed that: label, and msgid and msgstr. | |
83 # | |
84 sub print_problem { | |
85 unless ($did_print) { | |
86 print "ENTRY:", ($ARGV eq "-" ? "" : " ($ARGV, line $entryline)"), "\n"; | |
87 print_one("msgid", @amsgid); | |
88 print_one("msgstr", @amsgstr); | |
89 $did_print = 1; | |
90 } | |
91 print "*** ", @_; | |
92 } | |
93 | |
94 # Check final newline: probably, translations should end in a newline | |
95 # if and only if the original string does. | |
96 # (See also check_trailing_whitespace and check_num_newlines below.) | |
97 # | |
98 sub check_trailing_newlines { | |
99 if ($opt_x) { return; } | |
100 | |
101 my ($ichar, $schar); | |
102 | |
103 $ichar = (length($msgid)>=2) ? substr($msgid, -2, 2) : ""; | |
104 $schar = (length($msgstr)>=2) ? substr($msgstr, -2, 2) : ""; | |
105 | |
106 if ($ichar eq "\\n" && $schar ne "\\n") { | |
107 print_problem "Missing trailing newline\n"; | |
108 } | |
109 if ($ichar ne "\\n" && $schar eq "\\n") { | |
110 print_problem "Extra trailing newline\n"; | |
111 } | |
112 | |
113 } | |
114 | |
115 # Check leading whitespace. In general, any leading whitespace should | |
116 # be the same in msgstr and msgid -- but not always. | |
117 # | |
118 sub check_leading_whitespace { | |
119 unless ($opt_w) { return; } | |
120 | |
121 my ($id, $str); | |
122 | |
123 if ($msgid =~ m/^(\s+)/) { | |
124 $id = $1; | |
125 } else { | |
126 $id = ""; | |
127 } | |
128 if ($msgstr =~ m/^(\s+)/) { | |
129 $str = $1; | |
130 } else { | |
131 $str = ""; | |
132 } | |
133 if ($id ne $str) { | |
134 print_problem "Different leading whitespace\n"; | |
135 } | |
136 } | |
137 | |
138 # Check trailing whitespace. In general, any trailing whitespace should | |
139 # be the same in msgstr and msgid -- but not always. | |
140 # | |
141 sub check_trailing_whitespace { | |
142 unless ($opt_W) { return; } | |
143 | |
144 my ($id, $str); | |
145 | |
146 if ($msgid =~ m/((?:\s|\\n)+)$/) { | |
147 $id = $1; | |
148 } else { | |
149 $id = ""; | |
150 } | |
151 if ($msgstr =~ m/((?:\s|\\n)+)$/) { | |
152 $str = $1; | |
153 } else { | |
154 $str = ""; | |
155 } | |
156 if ($id ne $str) { | |
157 print_problem "Different trailing whitespace\n"; | |
158 } | |
159 } | |
160 | |
161 # Check equal numbers of newlines. In general ... etc. | |
162 # | |
163 sub check_num_newlines { | |
164 unless ($opt_n) { return; } | |
165 | |
166 my $num_i = ($msgid =~ m(\\n)g); | |
167 my $num_s = ($msgstr =~ m(\\n)g); | |
168 | |
169 if ($num_i != $num_s) { | |
170 print_problem "Mismatch in newline count\n"; | |
171 } | |
172 | |
173 } | |
174 | |
175 # Check capitalization of first non-whitespace character (for [a-zA-Z] | |
176 # only). In general ... etc. | |
177 # | |
178 sub check_leading_capitalization { | |
179 unless ($opt_c) { return; } | |
180 | |
181 my ($id, $str); | |
182 | |
183 if ($msgid =~ m/^\s*([a-zA-Z])/) { | |
184 $id = $1; | |
185 } | |
186 if ($msgstr =~ m/^\s*([a-zA-Z])/) { | |
187 $str = $1; | |
188 } | |
189 if (defined($id) && defined($str)) { | |
190 if (($id =~ /^[a-z]$/ && $str =~ /^[A-Z]$/) || | |
191 ($id =~ /^[A-Z]$/ && $str =~ /^[a-z]$/)) { | |
192 print_problem "Different leading capitalization\n"; | |
193 } | |
194 } | |
195 } | |
196 | |
197 # Check trailing 'punctuation' characters (ignoring trailing whitespace). | |
198 # In general .. etc. | |
199 # | |
200 sub check_trailing_punctuation { | |
201 unless ($opt_p) { return; } | |
202 | |
203 my ($id, $str); | |
204 | |
205 # Might want more characters: | |
206 if ($msgid =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { | |
207 $id = $1; | |
208 } else { | |
209 $id = ""; | |
210 } | |
211 if ($msgstr =~ m/([\\\.\/\,\!\?\"\'\:\;])+(?:\s|\\n)*$/) { | |
212 $str = $1; | |
213 } else { | |
214 $str = ""; | |
215 } | |
216 ##print "$id $str\n"; | |
217 if ($id ne $str) { | |
218 print_problem "Different trailing punctuation\n"; | |
219 } | |
220 } | |
221 | |
222 # Check that multiline strings have whitespace separation, since | |
223 # otherwise, eg: | |
224 # msgstr "this is a multiline" | |
225 # "string" | |
226 # expands to: | |
227 # "this is a multilinestring" | |
228 # | |
229 sub check_whitespace_joins { | |
230 if ($opt_x) { return; } | |
231 | |
232 my $ok = 1; | |
233 my $i = 0; | |
234 | |
235 foreach my $aref (\@amsgid, \@amsgstr) { | |
236 my $prev = undef; | |
237 LINE: | |
238 foreach my $line (@$aref) { | |
239 if (defined($prev) | |
240 && length($prev) | |
241 && $prev !~ /\s$/ | |
242 && $prev !~ /\\n$/ | |
243 && $line !~ /^\s/ | |
244 && $line !~ /^\\n/) | |
245 { | |
246 $ok = 0; | |
247 last LINE; | |
248 } | |
249 $prev = $line; | |
250 } | |
251 if (!$ok) { | |
252 print_problem("Possible non-whitespace line-join problem in ", | |
253 ($i==0 ? "msgid" : "msgstr"), " \n"); | |
254 } | |
255 $i++; | |
256 } | |
257 } | |
258 | |
259 # Check printf-style format entries. | |
260 # Non-trivial, because translation strings may use format specifiers | |
261 # out of order, or skip some specifiers etc. Also gettext marks | |
262 # anything with '%' as cformat, though not all are. | |
263 # | |
264 sub check_cformat { | |
265 unless ($is_cformat) { return; } | |
266 if ($opt_x) { return; } | |
267 | |
268 my (@iform, @sform); | |
269 @iform = ($msgid =~ m/\%[0-9\.\$]*[a-z]/g); | |
270 @sform = ($msgstr =~ m/\%[0-9\.\$]*[a-z]/g); | |
271 | |
272 ##print join("::", @iform), "\n"; | |
273 ##print join("::", @sform), "\n"; | |
274 | |
275 my $js; # index in sform | |
276 my $j; # index into iform | |
277 SFORM: | |
278 for ($js=0; $js < @sform; $js++) { | |
279 my $sf = $sform[$js]; | |
280 my $sf_orig = $sf; | |
281 if ($sf =~ s/^\%([0-9]+)\$(.*[a-z])$/\%$2/) { | |
282 $j = $1-1; | |
283 } else { | |
284 $j = $js; | |
285 } | |
286 if ($j > $#iform) { | |
287 print_problem("Format number mismatch for $sf_orig [msgstr:", | |
288 ($js+1), "]\n"); | |
289 next SFORM; | |
290 } | |
291 my $if = $iform[$j]; | |
292 if ($sf ne $if) { | |
293 print_problem("Format mismatch: $sf_orig [msgstr:", ($js+1), "]", | |
294 " vs $if [msgid:", ($j+1), "]\n"); | |
295 } | |
296 } | |
297 } | |
298 | |
299 # Run all individual checks on current entry, reporting any problems. | |
300 sub check_entry { | |
301 if ($is_fuzzy) { | |
302 return; | |
303 } | |
304 $msgid = join("", @amsgid); | |
305 $msgstr = join("", @amsgstr); | |
306 | |
307 unless ($opt_x) { | |
308 if (length($msgid)==0) { | |
309 print_problem "Zero length msgid\n"; | |
310 } | |
311 } | |
312 if (length($msgstr)==0) { | |
313 unless ($opt_e) { return; } | |
314 print_problem "Untranslated msgid\n"; | |
315 } | |
316 check_cformat; | |
317 check_whitespace_joins; | |
318 check_num_newlines; | |
319 check_leading_whitespace; | |
320 check_trailing_newlines; | |
321 check_trailing_whitespace; | |
322 check_leading_capitalization; | |
323 check_trailing_punctuation; | |
324 } | |
325 | |
326 new_entry; | |
327 | |
328 LINE: | |
329 while(<>) { | |
330 if ( m(^\s*$) ) { | |
331 if ($state==S_DOING_MSGSTR) { | |
332 check_entry; | |
333 new_entry; | |
334 } | |
335 next LINE; | |
336 } | |
337 if ( m(^\#, fuzzy) ) { | |
338 $is_fuzzy = 1; | |
339 } | |
340 if ( m(^\#, .*c-format) ) { | |
341 # .* is because can have fuzzy, c-format | |
342 $is_cformat = 1; | |
343 } | |
344 if ( m(^\#) ) { | |
345 next LINE; | |
346 } | |
347 if ( m(^msgid \"(.*)\"$) ) { | |
348 $entryline = $.; | |
349 @amsgid = ($1); | |
350 $state = S_DOING_MSGID; | |
351 next LINE; | |
352 } | |
353 if ( m(^msgid_plural \"(.*)\"$) ) { | |
354 $entryline = $.; | |
355 @amsgid = ($1); | |
356 $state = S_DOING_MSGID; | |
357 next LINE; | |
358 } | |
359 if ( m(^msgstr \"(.*)\"$) ) { | |
360 @amsgstr = ($1); | |
361 $state = S_DOING_MSGSTR; | |
362 next LINE; | |
363 } | |
364 if ( m(^msgstr\[[0-2]\] \"(.*)\"$) ) { | |
365 @amsgstr = ($1); | |
366 $state = S_DOING_MSGSTR; | |
367 next LINE; | |
368 } | |
369 if ( m(^\"(.*)\"$) ) { | |
370 if ($state==S_DOING_MSGID) { | |
371 push @amsgid, $1; | |
372 } elsif($state==S_DOING_MSGSTR) { | |
373 push @amsgstr, $1; | |
374 } else { | |
375 die "Looking at string $_ in bad state $state,"; | |
376 } | |
377 next LINE; | |
378 } | |
379 die "Unexpected at $.: ", $_; | |
380 } |