Mercurial > hgbook
comparison tools/po4a/lib/Locale/Po4a/Xml.pm @ 722:082bb76417f1
Add Po4a 0.37-dev(2009-03-08)
author | Dongsheng Song <dongsheng.song@gmail.com> |
---|---|
date | Thu, 12 Mar 2009 15:43:56 +0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
721:2180358c32c4 | 722:082bb76417f1 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 # Po4a::Xml.pm | |
4 # | |
5 # extract and translate translatable strings from XML documents. | |
6 # | |
7 # This code extracts plain text from tags and attributes from generic | |
8 # XML documents, and it can be used as a base to build modules for | |
9 # XML-based documents. | |
10 # | |
11 # Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com> | |
12 # Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net> | |
13 # | |
14 # This program is free software; you can redistribute it and/or modify | |
15 # it under the terms of the GNU General Public License as published by | |
16 # the Free Software Foundation; either version 2 of the License, or | |
17 # (at your option) any later version. | |
18 # | |
19 # This program is distributed in the hope that it will be useful, | |
20 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 # GNU General Public License for more details. | |
23 # | |
24 # You should have received a copy of the GNU General Public License | |
25 # along with this program; if not, write to the Free Software | |
26 # Foundation, Inc., | |
27 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
28 # | |
29 ######################################################################## | |
30 | |
31 =head1 NAME | |
32 | |
33 Locale::Po4a::Xml - Convert XML documents and derivates from/to PO files | |
34 | |
35 =head1 DESCRIPTION | |
36 | |
37 The po4a (po for anything) project goal is to ease translations (and more | |
38 interestingly, the maintenance of translations) using gettext tools on | |
39 areas where they were not expected like documentation. | |
40 | |
41 Locale::Po4a::Xml is a module to help the translation of XML documents into | |
42 other [human] languages. It can also be used as a base to build modules for | |
43 XML-based documents. | |
44 | |
45 =cut | |
46 | |
47 package Locale::Po4a::Xml; | |
48 | |
49 use 5.006; | |
50 use strict; | |
51 use warnings; | |
52 | |
53 require Exporter; | |
54 use vars qw(@ISA @EXPORT); | |
55 @ISA = qw(Locale::Po4a::TransTractor); | |
56 @EXPORT = qw(new initialize @tag_types); | |
57 | |
58 use Locale::Po4a::TransTractor; | |
59 use Locale::Po4a::Common; | |
60 use Carp qw(croak); | |
61 use File::Basename; | |
62 use File::Spec; | |
63 | |
64 #It will mantain the path from the root tag to the current one | |
65 my @path; | |
66 | |
67 #It will contain a list of external entities and their attached paths | |
68 my %entities; | |
69 | |
70 my @comments; | |
71 | |
72 sub shiftline { | |
73 my $self = shift; | |
74 # call Transtractor's shiftline | |
75 my ($line,$ref) = $self->SUPER::shiftline(); | |
76 return ($line,$ref) if (not defined $line); | |
77 | |
78 for my $k (keys %entities) { | |
79 if ($line =~ m/^(.*?)&$k;(.*)$/s) { | |
80 my ($before, $after) = ($1, $2); | |
81 my $linenum=0; | |
82 my @textentries; | |
83 | |
84 open (my $in, $entities{$k}) | |
85 or croak wrap_mod("po4a::xml", | |
86 dgettext("po4a", "Can't read from %s: %s"), | |
87 $entities{$k}, $!); | |
88 while (defined (my $textline = <$in>)) { | |
89 $linenum++; | |
90 my $textref=$entities{$k}.":$linenum"; | |
91 push @textentries, ($textline,$textref); | |
92 } | |
93 close $in | |
94 or croak wrap_mod("po4a::xml", | |
95 dgettext("po4a", "Can't close %s after reading: %s"), | |
96 $entities{$k}, $!); | |
97 | |
98 push @textentries, ($after, $ref); | |
99 $line = $before.(shift @textentries); | |
100 $ref .= " ".(shift @textentries); | |
101 $self->unshiftline(@textentries); | |
102 } | |
103 } | |
104 | |
105 return ($line,$ref); | |
106 } | |
107 | |
108 sub read { | |
109 my ($self,$filename)=@_; | |
110 push @{$self->{DOCPOD}{infile}}, $filename; | |
111 $self->Locale::Po4a::TransTractor::read($filename); | |
112 } | |
113 | |
114 sub parse { | |
115 my $self=shift; | |
116 map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}}; | |
117 } | |
118 | |
119 # @save_holders is a stack of references to ('paragraph', 'translation', | |
120 # 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where: | |
121 # paragraph is a reference to an array (see paragraph in the | |
122 # treat_content() subroutine) of strings followed by | |
123 # references. It contains the @paragraph array as it was | |
124 # before the processing was interrupted by a tag instroducing | |
125 # a placeholder. | |
126 # translation is the translation of this level up to now | |
127 # sub_translations is a reference to an array of strings containing the | |
128 # translations which must replace the placeholders. | |
129 # open is the tag which opened the placeholder. | |
130 # close is the tag which closed the placeholder. | |
131 # folded_attributes is an hash of tags with their attributes (<tag attrs=...> | |
132 # strings), referenced by the folded tag id, which should | |
133 # replace the <tag po4a-id=id> strings in the current | |
134 # translation. | |
135 # | |
136 # If @save_holders only has 1 holder, then we are not processing the | |
137 # content of an holder, we are translating the document. | |
138 my @save_holders; | |
139 | |
140 | |
141 # If we are at the bottom of the stack and there is no <placeholder ...> in | |
142 # the current translation, we can push the translation in the translated | |
143 # document. | |
144 # Otherwise, we keep the translation in the current holder. | |
145 sub pushline { | |
146 my ($self, $line) = (shift, shift); | |
147 | |
148 my $holder = $save_holders[$#save_holders]; | |
149 my $translation = $holder->{'translation'}; | |
150 $translation .= $line; | |
151 | |
152 while ( %{$holder->{folded_attributes}} | |
153 and $translation =~ m/^(.*)<([^>]+?)\s+po4a-id=([0-9]+)>(.*)$/s) { | |
154 my $begin = $1; | |
155 my $tag = $2; | |
156 my $id = $3; | |
157 my $end = $4; | |
158 if (defined $holder->{folded_attributes}->{$id}) { | |
159 # TODO: check if the tag is the same | |
160 $translation = $begin.$holder->{folded_attributes}->{$id}.$end; | |
161 delete $holder->{folded_attributes}->{$id}; | |
162 } else { | |
163 # TODO: It will be hard to identify the location. | |
164 # => find a way to retrieve the reference. | |
165 die wrap_mod("po4a::xml", dgettext("po4a", "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)."), $id, $id); | |
166 } | |
167 } | |
168 # TODO: check that %folded_attributes is empty at some time | |
169 # => in translate_paragraph? | |
170 | |
171 if ( ($#save_holders > 0) | |
172 or ($translation =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s)) { | |
173 $holder->{'translation'} = $translation; | |
174 } else { | |
175 $self->SUPER::pushline($translation); | |
176 $holder->{'translation'} = ''; | |
177 } | |
178 } | |
179 | |
180 =head1 TRANSLATING WITH PO4A::XML | |
181 | |
182 This module can be used directly to handle generic XML documents. This will | |
183 extract all tag's content, and no attributes, since it's where the text is | |
184 written in most XML based documents. | |
185 | |
186 There are some options (described in the next section) that can customize | |
187 this behavior. If this doesn't fit to your document format you're encouraged | |
188 to write your own module derived from this, to describe your format's details. | |
189 See the section "Writing derivate modules" below, for the process description. | |
190 | |
191 =cut | |
192 | |
193 # | |
194 # Parse file and translate it | |
195 # | |
196 sub parse_file { | |
197 my ($self,$filename) = @_; | |
198 my $eof = 0; | |
199 | |
200 while (!$eof) { | |
201 # We get all the text until the next breaking tag (not | |
202 # inline) and translate it | |
203 $eof = $self->treat_content; | |
204 if (!$eof) { | |
205 # And then we treat the following breaking tag | |
206 $eof = $self->treat_tag; | |
207 } | |
208 } | |
209 } | |
210 | |
211 =head1 OPTIONS ACCEPTED BY THIS MODULE | |
212 | |
213 The global debug option causes this module to show the excluded strings, in | |
214 order to see if it skips something important. | |
215 | |
216 These are this module's particular options: | |
217 | |
218 =over 4 | |
219 | |
220 =item B<nostrip> | |
221 | |
222 Prevents it to strip the spaces around the extracted strings. | |
223 | |
224 =item B<wrap> | |
225 | |
226 Canonizes the string to translate, considering that whitespaces are not | |
227 important, and wraps the translated document. This option can be overridden | |
228 by custom tag options. See the "tags" option below. | |
229 | |
230 =item B<caseinsensitive> | |
231 | |
232 It makes the tags and attributes searching to work in a case insensitive | |
233 way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang. | |
234 | |
235 =item B<includeexternal> | |
236 | |
237 When defined, external entities are included in the generated (translated) | |
238 document, and for the extraction of strings. If it's not defined, you | |
239 will have to translate external entities separately as independent | |
240 documents. | |
241 | |
242 =item B<ontagerror> | |
243 | |
244 This option defines the behavior of the module when it encounter a invalid | |
245 Xml syntax (a closing tag which does not match the last opening tag, or a | |
246 tag's attribute without value). | |
247 It can take the following values: | |
248 | |
249 =over | |
250 | |
251 =item I<fail> | |
252 | |
253 This is the default value. | |
254 The module will exit with an error. | |
255 | |
256 =item I<warn> | |
257 | |
258 The module will continue, and will issue a warning. | |
259 | |
260 =item I<silent> | |
261 | |
262 The module will continue without any warnings. | |
263 | |
264 =back | |
265 | |
266 Be careful when using this option. | |
267 It is generally recommended to fix the input file. | |
268 | |
269 =item B<tagsonly> | |
270 | |
271 Extracts only the specified tags in the "tags" option. Otherwise, it | |
272 will extract all the tags except the ones specified. | |
273 | |
274 Note: This option is deprecated. | |
275 | |
276 =item B<doctype> | |
277 | |
278 String that will try to match with the first line of the document's doctype | |
279 (if defined). If it doesn't, a warning will indicate that the document | |
280 might be of a bad type. | |
281 | |
282 =item B<tags> | |
283 | |
284 Space-separated list of tags you want to translate or skip. By default, | |
285 the specified tags will be excluded, but if you use the "tagsonly" option, | |
286 the specified tags will be the only ones included. The tags must be in the | |
287 form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of | |
288 the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag. | |
289 | |
290 You can also specify some tag options putting some characters in front of | |
291 the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap) | |
292 to override the default behavior specified by the global "wrap" option. | |
293 | |
294 Example: WE<lt>chapterE<gt>E<lt>titleE<gt> | |
295 | |
296 Note: This option is deprecated. | |
297 You should use the B<translated> and B<untranslated> options instead. | |
298 | |
299 =item B<attributes> | |
300 | |
301 Space-separated list of tag's attributes you want to translate. You can | |
302 specify the attributes by their name (for example, "lang"), but you can | |
303 prefix it with a tag hierarchy, to specify that this attribute will only be | |
304 translated when it's into the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang | |
305 specifies that the lang attribute will only be translated if it's into an | |
306 E<lt>aaaE<gt> tag, and it's into a E<lt>bbbE<gt> tag. | |
307 | |
308 =item B<foldattributes> | |
309 | |
310 Do not translate attributes in inline tags. | |
311 Instead, replace all attributes of a tag by po4a-id=<id>. | |
312 | |
313 This is useful when attributes shall not be translated, as this simplifies the | |
314 strings for translators, and avoids typos. | |
315 | |
316 =item B<break> | |
317 | |
318 Space-separated list of tags which should break the sequence. | |
319 By default, all tags break the sequence. | |
320 | |
321 The tags must be in the form <aaa>, but you can join some | |
322 (<bbb><aaa>), if a tag (<aaa>) should only be considered | |
323 when it's into another tag (<bbb>). | |
324 | |
325 =item B<inline> | |
326 | |
327 Space-separated list of tags which should be treated as inline. | |
328 By default, all tags break the sequence. | |
329 | |
330 The tags must be in the form <aaa>, but you can join some | |
331 (<bbb><aaa>), if a tag (<aaa>) should only be considered | |
332 when it's into another tag (<bbb>). | |
333 | |
334 =item B<placeholder> | |
335 | |
336 Space-separated list of tags which should be treated as placeholders. | |
337 Placeholders do not break the sequence, but the content of placeholders is | |
338 translated separately. | |
339 | |
340 The location of the placeholder in its blocks will be marked with a string | |
341 similar to: | |
342 | |
343 <placeholder type=\"footnote\" id=\"0\"/> | |
344 | |
345 The tags must be in the form <aaa>, but you can join some | |
346 (<bbb><aaa>), if a tag (<aaa>) should only be considered | |
347 when it's into another tag (<bbb>). | |
348 | |
349 =item B<nodefault> | |
350 | |
351 Space separated list of tags that the module should not try to set by | |
352 default in any category. | |
353 | |
354 =item B<cpp> | |
355 | |
356 Support C preprocessor directives. | |
357 When this option is set, po4a will consider preprocessor directives as | |
358 paragraph separators. | |
359 This is important if the XML file must be preprocessed because otherwise | |
360 the directives may be inserted in the middle of lines if po4a consider it | |
361 belong to the current paragraph, and they won't be recognized by the | |
362 preprocessor. | |
363 Note: the preprocessor directives must only appear between tags | |
364 (they must not break a tag). | |
365 | |
366 =item B<translated> | |
367 | |
368 Space-separated list of tags you want to translate. | |
369 | |
370 The tags must be in the form <aaa>, but you can join some | |
371 (<bbb><aaa>), if a tag (<aaa>) should only be considered | |
372 when it's into another tag (<bbb>). | |
373 | |
374 You can also specify some tag options putting some characters in front of | |
375 the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap) | |
376 to overide the default behavior specified by the global "wrap" option. | |
377 | |
378 Example: WE<lt>chapterE<gt>E<lt>titleE<gt> | |
379 | |
380 =item B<untranslated> | |
381 | |
382 Space-separated list of tags you do not want to translate. | |
383 | |
384 The tags must be in the form <aaa>, but you can join some | |
385 (<bbb><aaa>), if a tag (<aaa>) should only be considered | |
386 when it's into another tag (<bbb>). | |
387 | |
388 =item B<defaulttranslateoption> | |
389 | |
390 The default categories for tags that are not in any of the translated, | |
391 untranslated, break, inline, or placeholder. | |
392 | |
393 This is a set of letters: | |
394 | |
395 =over | |
396 | |
397 =item I<w> | |
398 | |
399 Tags should be translated and content can be re-wrapped. | |
400 | |
401 =item I<W> | |
402 | |
403 Tags should be translated and content should not be re-wrapped. | |
404 | |
405 =item I<i> | |
406 | |
407 Tags should be translated inline. | |
408 | |
409 =item I<p> | |
410 | |
411 Tags should be translated as placeholders. | |
412 | |
413 =back | |
414 | |
415 =back | |
416 | |
417 =cut | |
418 # TODO: defaulttranslateoption | |
419 # w => indicate that it is only valid for translatable tags and do not | |
420 # care about inline/break/placeholder? | |
421 # ... | |
422 | |
423 sub initialize { | |
424 my $self = shift; | |
425 my %options = @_; | |
426 | |
427 # Reset the path | |
428 @path = (); | |
429 | |
430 # Initialize the stack of holders | |
431 my @paragraph = (); | |
432 my @sub_translations = (); | |
433 my %folded_attributes; | |
434 my %holder = ('paragraph' => \@paragraph, | |
435 'translation' => "", | |
436 'sub_translations' => \@sub_translations, | |
437 'folded_attributes' => \%folded_attributes); | |
438 @save_holders = (\%holder); | |
439 | |
440 $self->{options}{'nostrip'}=0; | |
441 $self->{options}{'wrap'}=0; | |
442 $self->{options}{'caseinsensitive'}=0; | |
443 $self->{options}{'tagsonly'}=0; | |
444 $self->{options}{'tags'}=''; | |
445 $self->{options}{'break'}=''; | |
446 $self->{options}{'translated'}=''; | |
447 $self->{options}{'untranslated'}=''; | |
448 $self->{options}{'defaulttranslateoption'}=''; | |
449 $self->{options}{'attributes'}=''; | |
450 $self->{options}{'foldattributes'}=0; | |
451 $self->{options}{'inline'}=''; | |
452 $self->{options}{'placeholder'}=''; | |
453 $self->{options}{'doctype'}=''; | |
454 $self->{options}{'nodefault'}=''; | |
455 $self->{options}{'includeexternal'}=0; | |
456 $self->{options}{'ontagerror'}="fail"; | |
457 $self->{options}{'cpp'}=0; | |
458 | |
459 $self->{options}{'verbose'}=''; | |
460 $self->{options}{'debug'}=''; | |
461 | |
462 foreach my $opt (keys %options) { | |
463 if ($options{$opt}) { | |
464 die wrap_mod("po4a::xml", | |
465 dgettext("po4a", "Unknown option: %s"), $opt) | |
466 unless exists $self->{options}{$opt}; | |
467 $self->{options}{$opt} = $options{$opt}; | |
468 } | |
469 } | |
470 # Default options set by modules. Forbidden for users. | |
471 $self->{options}{'_default_translated'}=''; | |
472 $self->{options}{'_default_untranslated'}=''; | |
473 $self->{options}{'_default_break'}=''; | |
474 $self->{options}{'_default_inline'}=''; | |
475 $self->{options}{'_default_placeholder'}=''; | |
476 $self->{options}{'_default_attributes'}=''; | |
477 | |
478 #It will maintain the list of the translatable tags | |
479 $self->{tags}=(); | |
480 $self->{translated}=(); | |
481 $self->{untranslated}=(); | |
482 #It will maintain the list of the translatable attributes | |
483 $self->{attributes}=(); | |
484 #It will maintain the list of the breaking tags | |
485 $self->{break}=(); | |
486 #It will maintain the list of the inline tags | |
487 $self->{inline}=(); | |
488 #It will maintain the list of the placeholder tags | |
489 $self->{placeholder}=(); | |
490 #list of the tags that must not be set in the tags or inline category | |
491 #by this module or sub-module (unless specified in an option) | |
492 $self->{nodefault}=(); | |
493 | |
494 $self->treat_options; | |
495 } | |
496 | |
497 =head1 WRITING DERIVATE MODULES | |
498 | |
499 =head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE | |
500 | |
501 The simplest customization is to define which tags and attributes you want | |
502 the parser to translate. This should be done in the initialize function. | |
503 First you should call the main initialize, to get the command-line options, | |
504 and then, append your custom definitions to the options hash. If you want | |
505 to treat some new options from command line, you should define them before | |
506 calling the main initialize: | |
507 | |
508 $self->{options}{'new_option'}=''; | |
509 $self->SUPER::initialize(%options); | |
510 $self->{options}{'_default_translated'}.=' <p> <head><title>'; | |
511 $self->{options}{'attributes'}.=' <p>lang id'; | |
512 $self->{options}{'_default_inline'}.=' <br>'; | |
513 $self->treat_options; | |
514 | |
515 You should use the B<_default_inline>, B<_default_break>, | |
516 B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>, | |
517 and B<_default_attributes> options in derivated modules. This allow users | |
518 to override the default behavior defined in your module with command line | |
519 options. | |
520 | |
521 =head2 OVERRIDING THE found_string FUNCTION | |
522 | |
523 Another simple step is to override the function "found_string", which | |
524 receives the extracted strings from the parser, in order to translate them. | |
525 There you can control which strings you want to translate, and perform | |
526 transformations to them before or after the translation itself. | |
527 | |
528 It receives the extracted text, the reference on where it was, and a hash | |
529 that contains extra information to control what strings to translate, how | |
530 to translate them and to generate the comment. | |
531 | |
532 The content of these options depends on the kind of string it is (specified in an | |
533 entry of this hash): | |
534 | |
535 =over | |
536 | |
537 =item type="tag" | |
538 | |
539 The found string is the content of a translatable tag. The entry "tag_options" | |
540 contains the option characters in front of the tag hierarchy in the module | |
541 "tags" option. | |
542 | |
543 =item type="attribute" | |
544 | |
545 Means that the found string is the value of a translatable attribute. The | |
546 entry "attribute" has the name of the attribute. | |
547 | |
548 =back | |
549 | |
550 It must return the text that will replace the original in the translated | |
551 document. Here's a basic example of this function: | |
552 | |
553 sub found_string { | |
554 my ($self,$text,$ref,$options)=@_; | |
555 $text = $self->translate($text,$ref,"type ".$options->{'type'}, | |
556 'wrap'=>$self->{options}{'wrap'}); | |
557 return $text; | |
558 } | |
559 | |
560 There's another simple example in the new Dia module, which only filters | |
561 some strings. | |
562 | |
563 =cut | |
564 | |
565 sub found_string { | |
566 my ($self,$text,$ref,$options)=@_; | |
567 | |
568 if ($text =~ m/^\s*$/s) { | |
569 return $text; | |
570 } | |
571 | |
572 my $comment; | |
573 my $wrap = $self->{options}{'wrap'}; | |
574 | |
575 if ($options->{'type'} eq "tag") { | |
576 $comment = "Content of: ".$self->get_path; | |
577 | |
578 if($options->{'tag_options'} =~ /w/) { | |
579 $wrap = 1; | |
580 } | |
581 if($options->{'tag_options'} =~ /W/) { | |
582 $wrap = 0; | |
583 } | |
584 } elsif ($options->{'type'} eq "attribute") { | |
585 $comment = "Attribute '".$options->{'attribute'}."' of: ".$self->get_path; | |
586 } elsif ($options->{'type'} eq "CDATA") { | |
587 $comment = "CDATA"; | |
588 $wrap = 0; | |
589 } else { | |
590 die wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Internal error: unknown type identifier '%s'."), $options->{'type'}); | |
591 } | |
592 $text = $self->translate($text,$ref,$comment,'wrap'=>$wrap, comment => $options->{'comments'}); | |
593 return $text; | |
594 } | |
595 | |
596 =head2 MODIFYING TAG TYPES (TODO) | |
597 | |
598 This is a more complex one, but it enables a (almost) total customization. | |
599 It's based in a list of hashes, each one defining a tag type's behavior. The | |
600 list should be sorted so that the most general tags are after the most | |
601 concrete ones (sorted first by the beginning and then by the end keys). To | |
602 define a tag type you'll have to make a hash with the following keys: | |
603 | |
604 =over 4 | |
605 | |
606 =item beginning | |
607 | |
608 Specifies the beginning of the tag, after the "E<lt>". | |
609 | |
610 =item end | |
611 | |
612 Specifies the end of the tag, before the "E<gt>". | |
613 | |
614 =item breaking | |
615 | |
616 It says if this is a breaking tag class. A non-breaking (inline) tag is one | |
617 that can be taken as part of the content of another tag. It can take the | |
618 values false (0), true (1) or undefined. If you leave this undefined, you'll | |
619 have to define the f_breaking function that will say whether a concrete tag of | |
620 this class is a breaking tag or not. | |
621 | |
622 =item f_breaking | |
623 | |
624 It's a function that will tell if the next tag is a breaking one or not. It | |
625 should be defined if the "breaking" option is not. | |
626 | |
627 =item f_extract | |
628 | |
629 If you leave this key undefined, the generic extraction function will have to | |
630 extract the tag itself. It's useful for tags that can have other tags or | |
631 special structures in them, so that the main parser doesn't get mad. This | |
632 function receives a boolean that says if the tag should be removed from the | |
633 input stream or not. | |
634 | |
635 =item f_translate | |
636 | |
637 This function receives the tag (in the get_string_until() format) and returns | |
638 the translated tag (translated attributes or all needed transformations) as a | |
639 single string. | |
640 | |
641 =back | |
642 | |
643 =cut | |
644 | |
645 ##### Generic XML tag types #####' | |
646 | |
647 our @tag_types = ( | |
648 { beginning => "!--#", | |
649 end => "--", | |
650 breaking => 0, | |
651 f_extract => \&tag_extract_comment, | |
652 f_translate => \&tag_trans_comment}, | |
653 { beginning => "!--", | |
654 end => "--", | |
655 breaking => 0, | |
656 f_extract => \&tag_extract_comment, | |
657 f_translate => \&tag_trans_comment}, | |
658 { beginning => "?xml", | |
659 end => "?", | |
660 breaking => 1, | |
661 f_translate => \&tag_trans_xmlhead}, | |
662 { beginning => "?", | |
663 end => "?", | |
664 breaking => 1, | |
665 f_translate => \&tag_trans_procins}, | |
666 { beginning => "!DOCTYPE", | |
667 end => "", | |
668 breaking => 1, | |
669 f_extract => \&tag_extract_doctype, | |
670 f_translate => \&tag_trans_doctype}, | |
671 { beginning => "![CDATA[", | |
672 end => "", | |
673 breaking => 1, | |
674 f_extract => \&CDATA_extract, | |
675 f_translate => \&CDATA_trans}, | |
676 { beginning => "/", | |
677 end => "", | |
678 f_breaking => \&tag_break_close, | |
679 f_translate => \&tag_trans_close}, | |
680 { beginning => "", | |
681 end => "/", | |
682 f_breaking => \&tag_break_alone, | |
683 f_translate => \&tag_trans_alone}, | |
684 { beginning => "", | |
685 end => "", | |
686 f_breaking => \&tag_break_open, | |
687 f_translate => \&tag_trans_open} | |
688 ); | |
689 | |
690 sub tag_extract_comment { | |
691 my ($self,$remove)=(shift,shift); | |
692 my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove}); | |
693 return ($eof,@tag); | |
694 } | |
695 | |
696 sub tag_trans_comment { | |
697 my ($self,@tag)=@_; | |
698 return $self->join_lines(@tag); | |
699 } | |
700 | |
701 sub tag_trans_xmlhead { | |
702 my ($self,@tag)=@_; | |
703 | |
704 # We don't have to translate anything from here: throw away references | |
705 my $tag = $self->join_lines(@tag); | |
706 $tag =~ /encoding=(("|')|)(.*?)(\s|\2)/s; | |
707 my $in_charset=$3; | |
708 $self->detected_charset($in_charset); | |
709 my $out_charset=$self->get_out_charset; | |
710 | |
711 if (defined $in_charset) { | |
712 $tag =~ s/$in_charset/$out_charset/; | |
713 } else { | |
714 if ($tag =~ m/standalone/) { | |
715 $tag =~ s/(standalone)/encoding="$out_charset" $1/; | |
716 } else { | |
717 $tag.= " encoding=\"$out_charset\""; | |
718 } | |
719 } | |
720 | |
721 return $tag; | |
722 } | |
723 | |
724 sub tag_trans_procins { | |
725 my ($self,@tag)=@_; | |
726 return $self->join_lines(@tag); | |
727 } | |
728 | |
729 sub tag_extract_doctype { | |
730 my ($self,$remove)=(shift,shift); | |
731 | |
732 # Check if there is an internal subset (between []). | |
733 my ($eof,@tag)=$self->get_string_until('>',{include=>1,unquoted=>1}); | |
734 my $parity = 0; | |
735 my $paragraph = ""; | |
736 map { $parity = 1 - $parity; $paragraph.= $parity?$_:""; } @tag; | |
737 my $found = 0; | |
738 if ($paragraph =~ m/<.*\[.*</s) { | |
739 $found = 1 | |
740 } | |
741 | |
742 if (not $found) { | |
743 ($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1}); | |
744 } else { | |
745 ($eof,@tag)=$self->get_string_until(']\s*>',{include=>1,remove=>$remove,unquoted=>1,regex=>1}); | |
746 } | |
747 return ($eof,@tag); | |
748 } | |
749 | |
750 sub tag_trans_doctype { | |
751 # This check is not really reliable. There are system and public | |
752 # identifiers. Only the public one could be checked reliably. | |
753 my ($self,@tag)=@_; | |
754 if (defined $self->{options}{'doctype'} ) { | |
755 my $doctype = $self->{options}{'doctype'}; | |
756 if ( $tag[0] !~ /\Q$doctype\E/i ) { | |
757 warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"."), $doctype); | |
758 } | |
759 } | |
760 my $i = 0; | |
761 my $basedir = $tag[1]; | |
762 $basedir =~ s/:[0-9]+$//; | |
763 $basedir = dirname($basedir); | |
764 | |
765 while ( $i < $#tag ) { | |
766 my $t = $tag[$i]; | |
767 my $ref = $tag[$i+1]; | |
768 if ( $t =~ /^(\s*<!ENTITY\s+)(.*)$/is ) { | |
769 my $part1 = $1; | |
770 my $part2 = $2; | |
771 my $includenow = 0; | |
772 my $file = 0; | |
773 my $name = ""; | |
774 if ($part2 =~ /^(%\s+)(.*)$/s ) { | |
775 $part1.= $1; | |
776 $part2 = $2; | |
777 $includenow = 1; | |
778 } | |
779 $part2 =~ /^(\S+)(\s+)(.*)$/s; | |
780 $name = $1; | |
781 $part1.= $1.$2; | |
782 $part2 = $3; | |
783 if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) { | |
784 $part1.= $1; | |
785 $part2 = $2; | |
786 $file = 1; | |
787 if ($self->{options}{'includeexternal'}) { | |
788 $entities{$name} = $part2; | |
789 $entities{$name} =~ s/^"?(.*?)".*$/$1/s; | |
790 $entities{$name} = File::Spec->catfile($basedir, $entities{$name}); | |
791 } | |
792 } | |
793 if ((not $file) and (not $includenow)) { | |
794 if ($part2 =~ m/^\s*(["'])(.*)\1(\s*>.*)$/s) { | |
795 my $comment = "Content of the $name entity"; | |
796 my $quote = $1; | |
797 my $text = $2; | |
798 $part2 = $3; | |
799 $text = $self->translate($text, | |
800 $ref, | |
801 $comment, | |
802 'wrap'=>1); | |
803 $t = $part1."$quote$text$quote$part2"; | |
804 } | |
805 } | |
806 # print $part1."\n"; | |
807 # print $name."\n"; | |
808 # print $part2."\n"; | |
809 } | |
810 $tag[$i] = $t; | |
811 $i += 2; | |
812 } | |
813 return $self->join_lines(@tag); | |
814 } | |
815 | |
816 sub tag_break_close { | |
817 my ($self,@tag)=@_; | |
818 my $struct = $self->get_path; | |
819 my $options = $self->get_translate_options($struct); | |
820 if ($options =~ m/[ip]/) { | |
821 return 0; | |
822 } else { | |
823 return 1; | |
824 } | |
825 } | |
826 | |
827 sub tag_trans_close { | |
828 my ($self,@tag)=@_; | |
829 my $name = $self->get_tag_name(@tag); | |
830 | |
831 my $test = pop @path; | |
832 if (!defined($test) || $test ne $name ) { | |
833 my $ontagerror = $self->{options}{'ontagerror'}; | |
834 if ($ontagerror eq "warn") { | |
835 warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name); | |
836 } elsif ($ontagerror ne "silent") { | |
837 die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name); | |
838 } | |
839 } | |
840 return $self->join_lines(@tag); | |
841 } | |
842 | |
843 sub CDATA_extract { | |
844 my ($self,$remove)=(shift,shift); | |
845 my ($eof, @tag) = $self->get_string_until(']]>',{include=>1,unquoted=>0,remove=>$remove}); | |
846 | |
847 return ($eof, @tag); | |
848 } | |
849 | |
850 sub CDATA_trans { | |
851 my ($self,@tag)=@_; | |
852 return $self->found_string($self->join_lines(@tag), | |
853 $tag[1], | |
854 {'type' => "CDATA"}); | |
855 } | |
856 | |
857 sub tag_break_alone { | |
858 my ($self,@tag)=@_; | |
859 my $struct = $self->get_path($self->get_tag_name(@tag)); | |
860 if ($self->get_translate_options($struct) =~ m/i/) { | |
861 return 0; | |
862 } else { | |
863 return 1; | |
864 } | |
865 } | |
866 | |
867 sub tag_trans_alone { | |
868 my ($self,@tag)=@_; | |
869 my $name = $self->get_tag_name(@tag); | |
870 push @path, $name; | |
871 | |
872 $name = $self->treat_attributes(@tag); | |
873 | |
874 pop @path; | |
875 return $name; | |
876 } | |
877 | |
878 sub tag_break_open { | |
879 my ($self,@tag)=@_; | |
880 my $struct = $self->get_path($self->get_tag_name(@tag)); | |
881 my $options = $self->get_translate_options($struct); | |
882 if ($options =~ m/[ip]/) { | |
883 return 0; | |
884 } else { | |
885 return 1; | |
886 } | |
887 } | |
888 | |
889 sub tag_trans_open { | |
890 my ($self,@tag)=@_; | |
891 my $name = $self->get_tag_name(@tag); | |
892 push @path, $name; | |
893 | |
894 $name = $self->treat_attributes(@tag); | |
895 | |
896 return $name; | |
897 } | |
898 | |
899 ##### END of Generic XML tag types ##### | |
900 | |
901 =head1 INTERNAL FUNCTIONS used to write derivated parsers | |
902 | |
903 =head2 WORKING WITH TAGS | |
904 | |
905 =over 4 | |
906 | |
907 =item get_path() | |
908 | |
909 This function returns the path to the current tag from the document's root, | |
910 in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>. | |
911 | |
912 An additional array of tags (without brackets) can be passed in argument. | |
913 These path elements are added to the end of the current path. | |
914 | |
915 =cut | |
916 | |
917 sub get_path { | |
918 my $self = shift; | |
919 my @add = @_; | |
920 if ( @path > 0 or @add > 0 ) { | |
921 return "<".join("><",@path,@add).">"; | |
922 } else { | |
923 return "outside any tag (error?)"; | |
924 } | |
925 } | |
926 | |
927 =item tag_type() | |
928 | |
929 This function returns the index from the tag_types list that fits to the next | |
930 tag in the input stream, or -1 if it's at the end of the input file. | |
931 | |
932 =cut | |
933 | |
934 sub tag_type { | |
935 my $self = shift; | |
936 my ($line,$ref) = $self->shiftline(); | |
937 my ($match1,$match2); | |
938 my $found = 0; | |
939 my $i = 0; | |
940 | |
941 if (!defined($line)) { return -1; } | |
942 | |
943 $self->unshiftline($line,$ref); | |
944 my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1}); | |
945 my $line2 = $self->join_lines(@lines); | |
946 while (!$found && $i < @tag_types) { | |
947 ($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end}); | |
948 if ($line =~ /^<\Q$match1\E/) { | |
949 if (!defined($tag_types[$i]->{f_extract})) { | |
950 #print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n"; | |
951 if (defined($line2) and $line2 =~ /\Q$match2\E>$/) { | |
952 $found = 1; | |
953 #print "YES: <".$match1." ".$match2.">\n"; | |
954 } else { | |
955 #print "NO: <".$match1." ".$match2.">\n"; | |
956 $i++; | |
957 } | |
958 } else { | |
959 $found = 1; | |
960 } | |
961 } else { | |
962 $i++; | |
963 } | |
964 } | |
965 if (!$found) { | |
966 #It should never enter here, unless you undefine the most | |
967 #general tags (as <...>) | |
968 die "po4a::xml: Unknown tag type: ".$line."\n"; | |
969 } else { | |
970 return $i; | |
971 } | |
972 } | |
973 | |
974 =item extract_tag($$) | |
975 | |
976 This function returns the next tag from the input stream without the beginning | |
977 and end, in an array form, to maintain the references from the input file. It | |
978 has two parameters: the type of the tag (as returned by tag_type) and a | |
979 boolean, that indicates if it should be removed from the input stream. | |
980 | |
981 =cut | |
982 | |
983 sub extract_tag { | |
984 my ($self,$type,$remove) = (shift,shift,shift); | |
985 my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end}); | |
986 my ($eof,@tag); | |
987 if (defined($tag_types[$type]->{f_extract})) { | |
988 ($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove); | |
989 } else { | |
990 ($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1}); | |
991 } | |
992 $tag[0] =~ /^<\Q$match1\E(.*)$/s; | |
993 $tag[0] = $1; | |
994 $tag[$#tag-1] =~ /^(.*)\Q$match2\E>$/s; | |
995 $tag[$#tag-1] = $1; | |
996 return ($eof,@tag); | |
997 } | |
998 | |
999 =item get_tag_name(@) | |
1000 | |
1001 This function returns the name of the tag passed as an argument, in the array | |
1002 form returned by extract_tag. | |
1003 | |
1004 =cut | |
1005 | |
1006 sub get_tag_name { | |
1007 my ($self,@tag)=@_; | |
1008 $tag[0] =~ /^(\S*)/; | |
1009 return $1; | |
1010 } | |
1011 | |
1012 =item breaking_tag() | |
1013 | |
1014 This function returns a boolean that says if the next tag in the input stream | |
1015 is a breaking tag or not (inline tag). It leaves the input stream intact. | |
1016 | |
1017 =cut | |
1018 | |
1019 sub breaking_tag { | |
1020 my $self = shift; | |
1021 my $break; | |
1022 | |
1023 my $type = $self->tag_type; | |
1024 if ($type == -1) { return 0; } | |
1025 | |
1026 #print "TAG TYPE = ".$type."\n"; | |
1027 $break = $tag_types[$type]->{breaking}; | |
1028 if (!defined($break)) { | |
1029 # This tag's breaking depends on its content | |
1030 my ($eof,@lines) = $self->extract_tag($type,0); | |
1031 $break = &{$tag_types[$type]->{f_breaking}}($self,@lines); | |
1032 } | |
1033 #print "break = ".$break."\n"; | |
1034 return $break; | |
1035 } | |
1036 | |
1037 =item treat_tag() | |
1038 | |
1039 This function translates the next tag from the input stream. Using each | |
1040 tag type's custom translation functions. | |
1041 | |
1042 =cut | |
1043 | |
1044 sub treat_tag { | |
1045 my $self = shift; | |
1046 my $type = $self->tag_type; | |
1047 | |
1048 my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end}); | |
1049 my ($eof,@lines) = $self->extract_tag($type,1); | |
1050 | |
1051 $lines[0] =~ /^(\s*)(.*)$/s; | |
1052 my $space1 = $1; | |
1053 $lines[0] = $2; | |
1054 $lines[$#lines-1] =~ /^(.*?)(\s*)$/s; | |
1055 my $space2 = $2; | |
1056 $lines[$#lines-1] = $1; | |
1057 | |
1058 # Calling this tag type's specific handling (translation of | |
1059 # attributes...) | |
1060 my $line = &{$tag_types[$type]->{f_translate}}($self,@lines); | |
1061 $self->pushline("<".$match1.$space1.$line.$space2.$match2.">"); | |
1062 return $eof; | |
1063 } | |
1064 | |
1065 =item tag_in_list($@) | |
1066 | |
1067 This function returns a string value that says if the first argument (a tag | |
1068 hierarchy) matches any of the tags from the second argument (a list of tags | |
1069 or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the | |
1070 matched tag's options (the characters in front of the tag) or 1 (if that tag | |
1071 doesn't have options). | |
1072 | |
1073 =back | |
1074 | |
1075 =cut | |
1076 sub tag_in_list ($$$) { | |
1077 my ($self,$path,$list) = @_; | |
1078 if ($self->{options}{'caseinsensitive'}) { | |
1079 $path = lc $path; | |
1080 } | |
1081 | |
1082 while (1) { | |
1083 if (defined $list->{$path}) { | |
1084 if (length $list->{$path}) { | |
1085 return $list->{$path}; | |
1086 } else { | |
1087 return 1; | |
1088 } | |
1089 } | |
1090 last unless ($path =~ m/</); | |
1091 $path =~ s/^<.*?>//; | |
1092 } | |
1093 | |
1094 return 0; | |
1095 } | |
1096 | |
1097 =head2 WORKING WITH ATTRIBUTES | |
1098 | |
1099 =over 4 | |
1100 | |
1101 =item treat_attributes(@) | |
1102 | |
1103 This function handles the translation of the tags' attributes. It receives the tag | |
1104 without the beginning / end marks, and then it finds the attributes, and it | |
1105 translates the translatable ones (specified by the module option "attributes"). | |
1106 This returns a plain string with the translated tag. | |
1107 | |
1108 =back | |
1109 | |
1110 =cut | |
1111 | |
1112 sub treat_attributes { | |
1113 my ($self,@tag)=@_; | |
1114 | |
1115 $tag[0] =~ /^(\S*)(.*)/s; | |
1116 my $text = $1; | |
1117 $tag[0] = $2; | |
1118 | |
1119 while (@tag) { | |
1120 my $complete = 1; | |
1121 | |
1122 $text .= $self->skip_spaces(\@tag); | |
1123 if (@tag) { | |
1124 # Get the attribute's name | |
1125 $complete = 0; | |
1126 | |
1127 $tag[0] =~ /^([^\s=]+)(.*)/s; | |
1128 my $name = $1; | |
1129 my $ref = $tag[1]; | |
1130 $tag[0] = $2; | |
1131 $text .= $name; | |
1132 $text .= $self->skip_spaces(\@tag); | |
1133 if (@tag) { | |
1134 # Get the '=' | |
1135 if ($tag[0] =~ /^=(.*)/s) { | |
1136 $tag[0] = $1; | |
1137 $text .= "="; | |
1138 $text .= $self->skip_spaces(\@tag); | |
1139 if (@tag) { | |
1140 # Get the value | |
1141 my $value=""; | |
1142 $ref=$tag[1]; | |
1143 my $quot=substr($tag[0],0,1); | |
1144 if ($quot ne "\"" and $quot ne "'") { | |
1145 # Unquoted value | |
1146 $quot=""; | |
1147 $tag[0] =~ /^(\S+)(.*)/s; | |
1148 $value = $1; | |
1149 $tag[0] = $2; | |
1150 } else { | |
1151 # Quoted value | |
1152 $text .= $quot; | |
1153 $tag[0] =~ /^\Q$quot\E(.*)/s; | |
1154 $tag[0] = $1; | |
1155 while ($tag[0] !~ /\Q$quot\E/) { | |
1156 $value .= $tag[0]; | |
1157 shift @tag; | |
1158 shift @tag; | |
1159 } | |
1160 $tag[0] =~ /^(.*?)\Q$quot\E(.*)/s; | |
1161 $value .= $1; | |
1162 $tag[0] = $2; | |
1163 } | |
1164 $complete = 1; | |
1165 if ($self->tag_in_list($self->get_path.$name,$self->{attributes})) { | |
1166 $text .= $self->found_string($value, $ref, { type=>"attribute", attribute=>$name }); | |
1167 } else { | |
1168 print wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Content of attribute %s excluded: %s"), $self->get_path.$name, $value) | |
1169 if $self->debug(); | |
1170 $text .= $self->recode_skipped_text($value); | |
1171 } | |
1172 $text .= $quot; | |
1173 } | |
1174 } | |
1175 } | |
1176 | |
1177 unless ($complete) { | |
1178 my $ontagerror = $self->{options}{'ontagerror'}; | |
1179 if ($ontagerror eq "warn") { | |
1180 warn wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax. Continuing...")); | |
1181 } elsif ($ontagerror ne "silent") { | |
1182 die wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax")); | |
1183 } | |
1184 } | |
1185 } | |
1186 } | |
1187 return $text; | |
1188 } | |
1189 | |
1190 # Returns an empty string if the content in the $path should not be | |
1191 # translated. | |
1192 # | |
1193 # Otherwise, returns the set of options for translation: | |
1194 # w: the content shall be re-wrapped | |
1195 # W: the content shall not be re-wrapped | |
1196 # i: the tag shall be inlined | |
1197 # p: a placeholder shall replace the tag (and its content) | |
1198 # | |
1199 # A translatable inline tag in an untranslated tag is treated as a translatable breaking tag. | |
1200 my %translate_options_cache; | |
1201 sub get_translate_options { | |
1202 my $self = shift; | |
1203 my $path = shift; | |
1204 | |
1205 if (defined $translate_options_cache{$path}) { | |
1206 return $translate_options_cache{$path}; | |
1207 } | |
1208 | |
1209 my $options = ""; | |
1210 my $translate = 0; | |
1211 my $usedefault = 1; | |
1212 | |
1213 my $inlist = 0; | |
1214 my $tag = $self->get_tag_from_list($path, $self->{tags}); | |
1215 if (defined $tag) { | |
1216 $inlist = 1; | |
1217 } | |
1218 if ($self->{options}{'tagsonly'} eq $inlist) { | |
1219 $usedefault = 0; | |
1220 if (defined $tag) { | |
1221 $options = $tag; | |
1222 $options =~ s/<.*$//; | |
1223 } else { | |
1224 if ($self->{options}{'wrap'}) { | |
1225 $options = "w"; | |
1226 } else { | |
1227 $options = "W"; | |
1228 } | |
1229 } | |
1230 $translate = 1; | |
1231 } | |
1232 | |
1233 # TODO: a less precise set of tags should not override a more precise one | |
1234 # The tags and tagsonly options are deprecated. | |
1235 # The translated and untranslated options have an higher priority. | |
1236 $tag = $self->get_tag_from_list($path, $self->{translated}); | |
1237 if (defined $tag) { | |
1238 $usedefault = 0; | |
1239 $options = $tag; | |
1240 $options =~ s/<.*$//; | |
1241 $translate = 1; | |
1242 } | |
1243 | |
1244 if ($translate and $options !~ m/w/i) { | |
1245 $options .= ($self->{options}{'wrap'})?"w":"W"; | |
1246 } | |
1247 | |
1248 if (not defined $tag) { | |
1249 $tag = $self->get_tag_from_list($path, $self->{untranslated}); | |
1250 if (defined $tag) { | |
1251 $usedefault = 0; | |
1252 $options = ""; | |
1253 $translate = 0; | |
1254 } | |
1255 } | |
1256 | |
1257 $tag = $self->get_tag_from_list($path, $self->{inline}); | |
1258 if (defined $tag) { | |
1259 $usedefault = 0; | |
1260 $options .= "i"; | |
1261 } else { | |
1262 $tag = $self->get_tag_from_list($path, $self->{placeholder}); | |
1263 if (defined $tag) { | |
1264 $usedefault = 0; | |
1265 $options .= "p"; | |
1266 } | |
1267 } | |
1268 | |
1269 if ($usedefault) { | |
1270 $options = $self->{options}{'defaulttranslateoption'}; | |
1271 } | |
1272 | |
1273 # A translatable inline tag in an untranslated tag is treated as a | |
1274 # translatable breaking tag. | |
1275 if ($options =~ m/i/) { | |
1276 my $ppath = $path; | |
1277 $ppath =~ s/<[^>]*>$//; | |
1278 my $poptions = $self->get_translate_options ($ppath); | |
1279 if ($poptions eq "") { | |
1280 $options =~ s/i//; | |
1281 } | |
1282 } | |
1283 | |
1284 if ($options =~ m/i/ and $self->{options}{'foldattributes'}) { | |
1285 $options .= "f"; | |
1286 } | |
1287 | |
1288 $translate_options_cache{$path} = $options; | |
1289 return $options; | |
1290 } | |
1291 | |
1292 | |
1293 # Return the tag (or biggest set of tags) of a list which matches with the | |
1294 # given path. | |
1295 # | |
1296 # The tag (or set of tags) is returned with its options. | |
1297 # | |
1298 # If no tags could match the path, undef is returned. | |
1299 sub get_tag_from_list ($$$) { | |
1300 my ($self,$path,$list) = @_; | |
1301 if ($self->{options}{'caseinsensitive'}) { | |
1302 $path = lc $path; | |
1303 } | |
1304 | |
1305 while (1) { | |
1306 if (defined $list->{$path}) { | |
1307 return $list->{$path}.$path; | |
1308 } | |
1309 last unless ($path =~ m/</); | |
1310 $path =~ s/^<.*?>//; | |
1311 } | |
1312 | |
1313 return undef; | |
1314 } | |
1315 | |
1316 | |
1317 | |
1318 sub treat_content { | |
1319 my $self = shift; | |
1320 my $blank=""; | |
1321 # Indicates if the paragraph will have to be translated | |
1322 my $translate = ""; | |
1323 | |
1324 my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1}); | |
1325 | |
1326 while (!$eof and !$self->breaking_tag) { | |
1327 NEXT_TAG: | |
1328 my @text; | |
1329 my $type = $self->tag_type; | |
1330 my $f_extract = $tag_types[$type]->{'f_extract'}; | |
1331 if ( defined($f_extract) | |
1332 and $f_extract eq \&tag_extract_comment) { | |
1333 # Remove the content of the comments | |
1334 ($eof, @text) = $self->extract_tag($type,1); | |
1335 $text[$#text-1] .= "\0"; | |
1336 if ($tag_types[$type]->{'beginning'} eq "!--#") { | |
1337 $text[0] = "#".$text[0]; | |
1338 } | |
1339 push @comments, @text; | |
1340 } else { | |
1341 my ($tmpeof, @tag) = $self->extract_tag($type,0); | |
1342 # Append the found inline tag | |
1343 ($eof,@text)=$self->get_string_until('>', | |
1344 {include=>1, | |
1345 remove=>1, | |
1346 unquoted=>1}); | |
1347 # Append or remove the opening/closing tag from | |
1348 # the tag path | |
1349 if ($tag_types[$type]->{'end'} eq "") { | |
1350 if ($tag_types[$type]->{'beginning'} eq "") { | |
1351 # Opening inline tag | |
1352 my $cur_tag_name = $self->get_tag_name(@tag); | |
1353 my $t_opts = $self->get_translate_options($self->get_path($cur_tag_name)); | |
1354 if ($t_opts =~ m/p/) { | |
1355 # We enter a new holder. | |
1356 # Append a <placeholder ...> tag to the current | |
1357 # paragraph, and save the @paragraph in the | |
1358 # current holder. | |
1359 my $last_holder = $save_holders[$#save_holders]; | |
1360 my $placeholder_str = "<placeholder type=\"".$cur_tag_name."\" id=\"".($#{$last_holder->{'sub_translations'}}+1)."\"/>"; | |
1361 push @paragraph, ($placeholder_str, $text[1]); | |
1362 my @saved_paragraph = @paragraph; | |
1363 | |
1364 $last_holder->{'paragraph'} = \@saved_paragraph; | |
1365 | |
1366 # Then we must push a new holder | |
1367 my @new_paragraph = (); | |
1368 my @sub_translations = (); | |
1369 my %folded_attributes; | |
1370 my %new_holder = ('paragraph' => \@new_paragraph, | |
1371 'open' => $text[0], | |
1372 'translation' => "", | |
1373 'close' => undef, | |
1374 'sub_translations' => \@sub_translations, | |
1375 'folded_attributes' => \%folded_attributes); | |
1376 push @save_holders, \%new_holder; | |
1377 @text = (); | |
1378 | |
1379 # The current @paragraph | |
1380 # (for the current holder) | |
1381 # is empty. | |
1382 @paragraph = (); | |
1383 } elsif ($t_opts =~ m/f/) { | |
1384 my $tag_full = $self->join_lines(@text); | |
1385 my $tag_ref = $text[1]; | |
1386 if ($tag_full =~ m/^<\s*\S+\s+\S.*>$/s) { | |
1387 my $holder = $save_holders[$#save_holders]; | |
1388 my $id = 0; | |
1389 foreach (keys %{$holder->{folded_attributes}}) { | |
1390 $id = $_ + 1 if ($_ >= $id); | |
1391 } | |
1392 $holder->{folded_attributes}->{$id} = $tag_full; | |
1393 | |
1394 @text = ("<$cur_tag_name po4a-id=$id>", $tag_ref); | |
1395 } | |
1396 } | |
1397 push @path, $cur_tag_name; | |
1398 } elsif ($tag_types[$type]->{'beginning'} eq "/") { | |
1399 # Closing inline tag | |
1400 | |
1401 # Check if this is closing the | |
1402 # last opening tag we detected. | |
1403 my $test = pop @path; | |
1404 my $name = $self->get_tag_name(@tag); | |
1405 if (!defined($test) || | |
1406 $test ne $name ) { | |
1407 my $ontagerror = $self->{options}{'ontagerror'}; | |
1408 if ($ontagerror eq "warn") { | |
1409 warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name); | |
1410 } elsif ($ontagerror ne "silent") { | |
1411 die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name); | |
1412 } | |
1413 } | |
1414 | |
1415 if ($self->get_translate_options($self->get_path($self->get_tag_name(@tag))) =~ m/p/) { | |
1416 # This closes the current holder. | |
1417 | |
1418 push @path, $self->get_tag_name(@tag); | |
1419 # Now translate this paragraph if needed. | |
1420 # This will call pushline and append the | |
1421 # translation to the current holder's translation. | |
1422 $self->translate_paragraph(@paragraph); | |
1423 pop @path; | |
1424 | |
1425 # Now that this holder is closed, we can remove | |
1426 # the holder from the stack. | |
1427 my $holder = pop @save_holders; | |
1428 # We need to keep the translation of this holder | |
1429 my $translation = $holder->{'open'}.$holder->{'translation'}.$text[0]; | |
1430 # FIXME: @text could be multilines. | |
1431 | |
1432 @text = (); | |
1433 | |
1434 # Then we store the translation in the previous | |
1435 # holder's sub_translations array | |
1436 my $previous_holder = $save_holders[$#save_holders]; | |
1437 push @{$previous_holder->{'sub_translations'}}, $translation; | |
1438 # We also need to restore the @paragraph array, as | |
1439 # it was before we encountered the holder. | |
1440 @paragraph = @{$previous_holder->{'paragraph'}}; | |
1441 } | |
1442 } | |
1443 } | |
1444 push @paragraph, @text; | |
1445 } | |
1446 | |
1447 # Next tag | |
1448 ($eof,@text)=$self->get_string_until('<',{remove=>1}); | |
1449 if ($#text > 0) { | |
1450 # Check if text (extracted after the inline tag) | |
1451 # has to be translated | |
1452 push @paragraph, @text; | |
1453 } | |
1454 } | |
1455 | |
1456 # This strips the extracted strings | |
1457 # (only if you don't specify the 'nostrip' option, and if the | |
1458 # paragraph can be re-wrapped) | |
1459 $translate = $self->get_translate_options($self->get_path); | |
1460 if (!$self->{options}{'nostrip'} and $translate !~ m/W/) { | |
1461 my $clean = 0; | |
1462 # Clean the beginning | |
1463 while (!$clean and $#paragraph > 0) { | |
1464 $paragraph[0] =~ /^(\s*)(.*)/s; | |
1465 my $match = $1; | |
1466 if ($paragraph[0] eq $match) { | |
1467 if ($match ne "") { | |
1468 $self->pushline($match); | |
1469 } | |
1470 shift @paragraph; | |
1471 shift @paragraph; | |
1472 } else { | |
1473 $paragraph[0] = $2; | |
1474 if ($match ne "") { | |
1475 $self->pushline($match); | |
1476 } | |
1477 $clean = 1; | |
1478 } | |
1479 } | |
1480 $clean = 0; | |
1481 # Clean the end | |
1482 while (!$clean and $#paragraph > 0) { | |
1483 $paragraph[$#paragraph-1] =~ /^(.*?)(\s*)$/s; | |
1484 my $match = $2; | |
1485 if ($paragraph[$#paragraph-1] eq $match) { | |
1486 if ($match ne "") { | |
1487 $blank = $match.$blank; | |
1488 } | |
1489 pop @paragraph; | |
1490 pop @paragraph; | |
1491 } else { | |
1492 $paragraph[$#paragraph-1] = $1; | |
1493 if ($match ne "") { | |
1494 $blank = $match.$blank; | |
1495 } | |
1496 $clean = 1; | |
1497 } | |
1498 } | |
1499 } | |
1500 | |
1501 # Translate the string when needed | |
1502 # This will either push the translation in the translated document or | |
1503 # in the current holder translation. | |
1504 $self->translate_paragraph(@paragraph); | |
1505 | |
1506 # Push the trailing blanks | |
1507 if ($blank ne "") { | |
1508 $self->pushline($blank); | |
1509 } | |
1510 return $eof; | |
1511 } | |
1512 | |
1513 # Translate a @paragraph array of (string, reference). | |
1514 # The $translate argument indicates if the strings must be translated or | |
1515 # just pushed | |
1516 sub translate_paragraph { | |
1517 my $self = shift; | |
1518 my @paragraph = @_; | |
1519 my $translate = $self->get_translate_options($self->get_path); | |
1520 | |
1521 while ( (scalar @paragraph) | |
1522 and ($paragraph[0] =~ m/^\s*\n/s)) { | |
1523 $self->pushline($paragraph[0]); | |
1524 shift @paragraph; | |
1525 shift @paragraph; | |
1526 } | |
1527 | |
1528 my $comments; | |
1529 while (@comments) { | |
1530 my ($comment,$eoc); | |
1531 do { | |
1532 my ($t,$l) = (shift @comments, shift @comments); | |
1533 $t =~ s/\n?(\0)?$//; | |
1534 $eoc = $1; | |
1535 $comment .= "\n" if defined $comment; | |
1536 $comment .= $t; | |
1537 } until ($eoc); | |
1538 $comments .= "\n" if defined $comments; | |
1539 $comments .= $comment; | |
1540 $self->pushline("<!--".$comment."-->\n") if defined $comment; | |
1541 } | |
1542 @comments = (); | |
1543 | |
1544 if ($self->{options}{'cpp'}) { | |
1545 my @tmp = @paragraph; | |
1546 @paragraph = (); | |
1547 while (@tmp) { | |
1548 my ($t,$l) = (shift @tmp, shift @tmp); | |
1549 # #include can be followed by a filename between | |
1550 # <> brackets. In that case, the argument won't be | |
1551 # handled in the same call to translate_paragraph. | |
1552 # Thus do not try to match "include ". | |
1553 if ($t =~ m/^#[ \t]*(if |endif|undef |include|else|ifdef |ifndef |define )/si) { | |
1554 if (@paragraph) { | |
1555 $self->translate_paragraph(@paragraph); | |
1556 @paragraph = (); | |
1557 $self->pushline("\n"); | |
1558 } | |
1559 $self->pushline($t); | |
1560 } else { | |
1561 push @paragraph, ($t,$l); | |
1562 } | |
1563 } | |
1564 } | |
1565 | |
1566 my $para = $self->join_lines(@paragraph); | |
1567 if ( length($para) > 0 ) { | |
1568 if ($translate ne "") { | |
1569 # This tag should be translated | |
1570 $self->pushline($self->found_string( | |
1571 $para, | |
1572 $paragraph[1], { | |
1573 type=>"tag", | |
1574 tag_options=>$translate, | |
1575 comments=>$comments | |
1576 })); | |
1577 } else { | |
1578 # Inform that this tag isn't translated in debug mode | |
1579 print wrap_ref_mod($paragraph[1], "po4a::xml", dgettext ("po4a", "Content of tag %s excluded: %s"), $self->get_path, $para) | |
1580 if $self->debug(); | |
1581 $self->pushline($self->recode_skipped_text($para)); | |
1582 } | |
1583 } | |
1584 # Now the paragraph is fully translated. | |
1585 # If we have all the holders' translation, we can replace the | |
1586 # placeholders by their translations. | |
1587 # We must wait to have all the translations because the holders are | |
1588 # numbered. | |
1589 { | |
1590 my $holder = $save_holders[$#save_holders]; | |
1591 my $translation = $holder->{'translation'}; | |
1592 | |
1593 # Count the number of <placeholder ...> in $translation | |
1594 my $count = 0; | |
1595 my $str = $translation; | |
1596 while ( (defined $str) | |
1597 and ($str =~ m/^.*?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s)) { | |
1598 $count += 1; | |
1599 $str = $2; | |
1600 if ($holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s) { | |
1601 $count = -1; | |
1602 last; | |
1603 } | |
1604 } | |
1605 | |
1606 if ( (defined $translation) | |
1607 and (scalar(@{$holder->{'sub_translations'}}) == $count)) { | |
1608 # OK, all the holders of the current paragraph are | |
1609 # closed (and translated). | |
1610 # Replace them by their translation. | |
1611 while ($translation =~ m/^(.*?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s) { | |
1612 # FIXME: we could also check that | |
1613 # * the holder exists | |
1614 # * all the holders are used | |
1615 $translation = $1.$holder->{'sub_translations'}->[$2].$3; | |
1616 } | |
1617 # We have our translation | |
1618 $holder->{'translation'} = $translation; | |
1619 # And there is no need for any holder in it. | |
1620 my @sub_translations = (); | |
1621 $holder->{'sub_translations'} = \@sub_translations; | |
1622 } | |
1623 } | |
1624 | |
1625 } | |
1626 | |
1627 | |
1628 | |
1629 =head2 WORKING WITH THE MODULE OPTIONS | |
1630 | |
1631 =over 4 | |
1632 | |
1633 =item treat_options() | |
1634 | |
1635 This function fills the internal structures that contain the tags, attributes | |
1636 and inline data with the options of the module (specified in the command-line | |
1637 or in the initialize function). | |
1638 | |
1639 =back | |
1640 | |
1641 =cut | |
1642 | |
1643 sub treat_options { | |
1644 my $self = shift; | |
1645 | |
1646 if ($self->{options}{'caseinsensitive'}) { | |
1647 $self->{options}{'nodefault'} = lc $self->{options}{'nodefault'}; | |
1648 $self->{options}{'tags'} = lc $self->{options}{'tags'}; | |
1649 $self->{options}{'break'} = lc $self->{options}{'break'}; | |
1650 $self->{options}{'_default_break'} = lc $self->{options}{'_default_break'}; | |
1651 $self->{options}{'translated'} = lc $self->{options}{'translated'}; | |
1652 $self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'}; | |
1653 $self->{options}{'untranslated'} = lc $self->{options}{'untranslated'}; | |
1654 $self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'}; | |
1655 $self->{options}{'attributes'} = lc $self->{options}{'attributes'}; | |
1656 $self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'}; | |
1657 $self->{options}{'inline'} = lc $self->{options}{'inline'}; | |
1658 $self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'}; | |
1659 $self->{options}{'placeholder'} = lc $self->{options}{'placeholder'}; | |
1660 $self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'}; | |
1661 } | |
1662 | |
1663 $self->{options}{'nodefault'} =~ /^\s*(.*)\s*$/s; | |
1664 my %list_nodefault; | |
1665 foreach (split(/\s+/s,$1)) { | |
1666 $list_nodefault{$_} = 1; | |
1667 } | |
1668 $self->{nodefault} = \%list_nodefault; | |
1669 | |
1670 $self->{options}{'tags'} =~ /^\s*(.*)\s*$/s; | |
1671 if (length $self->{options}{'tags'}) { | |
1672 warn wrap_mod("po4a::xml", | |
1673 dgettext("po4a", | |
1674 "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tags"); | |
1675 } | |
1676 foreach (split(/\s+/s,$1)) { | |
1677 $_ =~ m/^(.*?)(<.*)$/; | |
1678 $self->{tags}->{$2} = $1 || ""; | |
1679 } | |
1680 | |
1681 if ($self->{options}{'tagsonly'}) { | |
1682 warn wrap_mod("po4a::xml", | |
1683 dgettext("po4a", | |
1684 "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tagsonly"); | |
1685 } | |
1686 | |
1687 $self->{options}{'break'} =~ /^\s*(.*)\s*$/s; | |
1688 foreach my $tag (split(/\s+/s,$1)) { | |
1689 $tag =~ m/^(.*?)(<.*)$/; | |
1690 $self->{break}->{$2} = $1 || ""; | |
1691 } | |
1692 $self->{options}{'_default_break'} =~ /^\s*(.*)\s*$/s; | |
1693 foreach my $tag (split(/\s+/s,$1)) { | |
1694 $tag =~ m/^(.*?)(<.*)$/; | |
1695 $self->{break}->{$2} = $1 || "" | |
1696 unless $list_nodefault{$2} | |
1697 or defined $self->{break}->{$2}; | |
1698 } | |
1699 | |
1700 $self->{options}{'translated'} =~ /^\s*(.*)\s*$/s; | |
1701 foreach my $tag (split(/\s+/s,$1)) { | |
1702 $tag =~ m/^(.*?)(<.*)$/; | |
1703 $self->{translated}->{$2} = $1 || ""; | |
1704 } | |
1705 $self->{options}{'_default_translated'} =~ /^\s*(.*)\s*$/s; | |
1706 foreach my $tag (split(/\s+/s,$1)) { | |
1707 $tag =~ m/^(.*?)(<.*)$/; | |
1708 $self->{translated}->{$2} = $1 || "" | |
1709 unless $list_nodefault{$2} | |
1710 or defined $self->{translated}->{$2}; | |
1711 } | |
1712 | |
1713 $self->{options}{'untranslated'} =~ /^\s*(.*)\s*$/s; | |
1714 foreach my $tag (split(/\s+/s,$1)) { | |
1715 $tag =~ m/^(.*?)(<.*)$/; | |
1716 $self->{untranslated}->{$2} = $1 || ""; | |
1717 } | |
1718 $self->{options}{'_default_untranslated'} =~ /^\s*(.*)\s*$/s; | |
1719 foreach my $tag (split(/\s+/s,$1)) { | |
1720 $tag =~ m/^(.*?)(<.*)$/; | |
1721 $self->{untranslated}->{$2} = $1 || "" | |
1722 unless $list_nodefault{$2} | |
1723 or defined $self->{untranslated}->{$2}; | |
1724 } | |
1725 | |
1726 $self->{options}{'attributes'} =~ /^\s*(.*)\s*$/s; | |
1727 foreach my $tag (split(/\s+/s,$1)) { | |
1728 if ($tag =~ m/^(.*?)(<.*)$/) { | |
1729 $self->{attributes}->{$2} = $1 || ""; | |
1730 } else { | |
1731 $self->{attributes}->{$tag} = ""; | |
1732 } | |
1733 } | |
1734 $self->{options}{'_default_attributes'} =~ /^\s*(.*)\s*$/s; | |
1735 foreach my $tag (split(/\s+/s,$1)) { | |
1736 if ($tag =~ m/^(.*?)(<.*)$/) { | |
1737 $self->{attributes}->{$2} = $1 || "" | |
1738 unless $list_nodefault{$2} | |
1739 or defined $self->{attributes}->{$2}; | |
1740 } else { | |
1741 $self->{attributes}->{$tag} = "" | |
1742 unless $list_nodefault{$tag} | |
1743 or defined $self->{attributes}->{$tag}; | |
1744 } | |
1745 } | |
1746 | |
1747 my @list_inline; | |
1748 $self->{options}{'inline'} =~ /^\s*(.*)\s*$/s; | |
1749 foreach my $tag (split(/\s+/s,$1)) { | |
1750 $tag =~ m/^(.*?)(<.*)$/; | |
1751 $self->{inline}->{$2} = $1 || ""; | |
1752 } | |
1753 $self->{options}{'_default_inline'} =~ /^\s*(.*)\s*$/s; | |
1754 foreach my $tag (split(/\s+/s,$1)) { | |
1755 $tag =~ m/^(.*?)(<.*)$/; | |
1756 $self->{inline}->{$2} = $1 || "" | |
1757 unless $list_nodefault{$2} | |
1758 or defined $self->{inline}->{$2}; | |
1759 } | |
1760 | |
1761 $self->{options}{'placeholder'} =~ /^\s*(.*)\s*$/s; | |
1762 foreach my $tag (split(/\s+/s,$1)) { | |
1763 $tag =~ m/^(.*?)(<.*)$/; | |
1764 $self->{placeholder}->{$2} = $1 || ""; | |
1765 } | |
1766 $self->{options}{'_default_placeholder'} =~ /^\s*(.*)\s*$/s; | |
1767 foreach my $tag (split(/\s+/s,$1)) { | |
1768 $tag =~ m/^(.*?)(<.*)$/; | |
1769 $self->{placeholder}->{$2} = $1 || "" | |
1770 unless $list_nodefault{$2} | |
1771 or defined $self->{placeholder}->{$2}; | |
1772 } | |
1773 | |
1774 # There should be no translated and untranslated tags | |
1775 foreach my $tag (keys %{$self->{translated}}) { | |
1776 die wrap_mod("po4a::xml", | |
1777 dgettext("po4a", | |
1778 "Tag '%s' both in the %s and %s categories."), $tag, "translated", "untranslated") | |
1779 if defined $self->{untranslated}->{$tag}; | |
1780 } | |
1781 # There should be no inline, break, and placeholder tags | |
1782 foreach my $tag (keys %{$self->{inline}}) { | |
1783 die wrap_mod("po4a::xml", | |
1784 dgettext("po4a", | |
1785 "Tag '%s' both in the %s and %s categories."), $tag, "inline", "break") | |
1786 if defined $self->{break}->{$tag}; | |
1787 die wrap_mod("po4a::xml", | |
1788 dgettext("po4a", | |
1789 "Tag '%s' both in the %s and %s categories."), $tag, "inline", "placeholder") | |
1790 if defined $self->{placeholder}->{$tag}; | |
1791 } | |
1792 foreach my $tag (keys %{$self->{break}}) { | |
1793 die wrap_mod("po4a::xml", | |
1794 dgettext("po4a", | |
1795 "Tag '%s' both in the %s and %s categories."), $tag, "break", "placeholder") | |
1796 if defined $self->{placeholder}->{$tag}; | |
1797 } | |
1798 } | |
1799 | |
1800 =head2 GETTING TEXT FROM THE INPUT DOCUMENT | |
1801 | |
1802 =over | |
1803 | |
1804 =item get_string_until($%) | |
1805 | |
1806 This function returns an array with the lines (and references) from the input | |
1807 document until it finds the first argument. The second argument is an options | |
1808 hash. Value 0 means disabled (the default) and 1, enabled. | |
1809 | |
1810 The valid options are: | |
1811 | |
1812 =over 4 | |
1813 | |
1814 =item include | |
1815 | |
1816 This makes the returned array to contain the searched text | |
1817 | |
1818 =item remove | |
1819 | |
1820 This removes the returned stream from the input | |
1821 | |
1822 =item unquoted | |
1823 | |
1824 This ensures that the searched text is outside any quotes | |
1825 | |
1826 =back | |
1827 | |
1828 =cut | |
1829 | |
1830 sub get_string_until { | |
1831 my ($self,$search) = (shift,shift); | |
1832 my $options = shift; | |
1833 my ($include,$remove,$unquoted, $regex) = (0,0,0,0); | |
1834 | |
1835 if (defined($options->{include})) { $include = $options->{include}; } | |
1836 if (defined($options->{remove})) { $remove = $options->{remove}; } | |
1837 if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; } | |
1838 if (defined($options->{regex})) { $regex = $options->{regex}; } | |
1839 | |
1840 my ($line,$ref) = $self->shiftline(); | |
1841 my (@text,$paragraph); | |
1842 my ($eof,$found) = (0,0); | |
1843 | |
1844 $search = "\Q$search\E" unless $regex; | |
1845 while (defined($line) and !$found) { | |
1846 push @text, ($line,$ref); | |
1847 $paragraph .= $line; | |
1848 if ($unquoted) { | |
1849 if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*$search/s ) { | |
1850 $found = 1; | |
1851 } | |
1852 } else { | |
1853 if ( $paragraph =~ /$search/s ) { | |
1854 $found = 1; | |
1855 } | |
1856 } | |
1857 if (!$found) { | |
1858 ($line,$ref)=$self->shiftline(); | |
1859 } | |
1860 } | |
1861 | |
1862 if (!defined($line)) { $eof = 1; } | |
1863 | |
1864 if ( $found ) { | |
1865 $line = ""; | |
1866 if($unquoted) { | |
1867 $paragraph =~ /^(?:(?:\".*?\")|(?:\'.*?\')|[^\"\'])*?$search(.*)$/s; | |
1868 $line = $1; | |
1869 $text[$#text-1] =~ s/\Q$line\E$//s; | |
1870 } else { | |
1871 $paragraph =~ /$search(.*)$/s; | |
1872 $line = $1; | |
1873 $text[$#text-1] =~ s/\Q$line\E$//s; | |
1874 } | |
1875 if(!$include) { | |
1876 $text[$#text-1] =~ /^(.*)($search.*)$/s; | |
1877 $text[$#text-1] = $1; | |
1878 $line = $2.$line; | |
1879 } | |
1880 if (defined($line) and ($line ne "")) { | |
1881 $self->unshiftline ($line,$text[$#text]); | |
1882 } | |
1883 } | |
1884 if (!$remove) { | |
1885 $self->unshiftline (@text); | |
1886 } | |
1887 | |
1888 #If we get to the end of the file, we return the whole paragraph | |
1889 return ($eof,@text); | |
1890 } | |
1891 | |
1892 =item skip_spaces(\@) | |
1893 | |
1894 This function receives as argument the reference to a paragraph (in the format | |
1895 returned by get_string_until), skips his heading spaces and returns them as | |
1896 a simple string. | |
1897 | |
1898 =cut | |
1899 | |
1900 sub skip_spaces { | |
1901 my ($self,$pstring)=@_; | |
1902 my $space=""; | |
1903 | |
1904 while (@$pstring and (@$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "")) { | |
1905 if (@$pstring[0] ne "") { | |
1906 $space .= $1; | |
1907 @$pstring[0] = $2; | |
1908 } | |
1909 | |
1910 if (@$pstring[0] eq "") { | |
1911 shift @$pstring; | |
1912 shift @$pstring; | |
1913 } | |
1914 } | |
1915 return $space; | |
1916 } | |
1917 | |
1918 =item join_lines(@) | |
1919 | |
1920 This function returns a simple string with the text from the argument array | |
1921 (discarding the references). | |
1922 | |
1923 =cut | |
1924 | |
1925 sub join_lines { | |
1926 my ($self,@lines)=@_; | |
1927 my ($line,$ref); | |
1928 my $text = ""; | |
1929 while ($#lines > 0) { | |
1930 ($line,$ref) = (shift @lines,shift @lines); | |
1931 $text .= $line; | |
1932 } | |
1933 return $text; | |
1934 } | |
1935 | |
1936 =back | |
1937 | |
1938 =head1 STATUS OF THIS MODULE | |
1939 | |
1940 This module can translate tags and attributes. | |
1941 | |
1942 =head1 TODO LIST | |
1943 | |
1944 DOCTYPE (ENTITIES) | |
1945 | |
1946 There is a minimal support for the translation of entities. They are | |
1947 translated as a whole, and tags are not taken into account. Multilines | |
1948 entities are not supported and entities are always rewrapped during the | |
1949 translation. | |
1950 | |
1951 MODIFY TAG TYPES FROM INHERITED MODULES | |
1952 (move the tag_types structure inside the $self hash?) | |
1953 | |
1954 =head1 SEE ALSO | |
1955 | |
1956 L<po4a(7)|po4a.7>, L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>. | |
1957 | |
1958 =head1 AUTHORS | |
1959 | |
1960 Jordi Vilalta <jvprat@gmail.com> | |
1961 Nicolas François <nicolas.francois@centraliens.net> | |
1962 | |
1963 =head1 COPYRIGHT AND LICENSE | |
1964 | |
1965 Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com> | |
1966 Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net> | |
1967 | |
1968 This program is free software; you may redistribute it and/or modify it | |
1969 under the terms of GPL (see the COPYING file). | |
1970 | |
1971 =cut | |
1972 | |
1973 1; |