Mercurial > hgbook
changeset 722:082bb76417f1
Add Po4a 0.37-dev(2009-03-08)
author | Dongsheng Song <dongsheng.song@gmail.com> |
---|---|
date | Thu, 12 Mar 2009 15:43:56 +0800 |
parents | 2180358c32c4 |
children | 3c5e1c03cc3e |
files | .hgignore tools/po4a/lib/Locale/Po4a/Chooser.pm tools/po4a/lib/Locale/Po4a/Common.pm tools/po4a/lib/Locale/Po4a/Docbook.pm tools/po4a/lib/Locale/Po4a/Po.pm tools/po4a/lib/Locale/Po4a/TransTractor.pm tools/po4a/lib/Locale/Po4a/Xml.pm tools/po4a/po4a-translate tools/po4a/po4a-updatepo |
diffstat | 9 files changed, 7581 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgignore Thu Mar 12 15:40:40 2009 +0800 +++ b/.hgignore Thu Mar 12 15:43:56 2009 +0800 @@ -1,4 +1,5 @@ -[^/]+/htdocs/ +^htdocs/ +^tools/fop/ syntax: glob
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/po4a/lib/Locale/Po4a/Chooser.pm Thu Mar 12 15:43:56 2009 +0800 @@ -0,0 +1,148 @@ +# Locale::Po4a::Pod -- Convert POD data to PO file, for translation. +# $Id: Chooser.pm,v 1.41 2008-07-20 16:31:55 nekral-guest Exp $ +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL (see COPYING). +# +# This module converts POD to PO file, so that it becomes possible to +# translate POD formatted documentation. See gettext documentation for +# more info about PO files. + +############################################################################ +# Modules and declarations +############################################################################ + + +package Locale::Po4a::Chooser; + +use 5.006; +use strict; +use warnings; +use Locale::Po4a::Common; + +sub new { + my ($module)=shift; + my (%options)=@_; + + die wrap_mod("po4a::chooser", gettext("Need to provide a module name")) + unless defined $module; + + my $modname; + if ($module eq 'kernelhelp') { + $modname = 'KernelHelp'; + } elsif ($module eq 'newsdebian') { + $modname = 'NewsDebian'; + } elsif ($module eq 'latex') { + $modname = 'LaTeX'; + } elsif ($module eq 'bibtex') { + $modname = 'BibTex'; + } elsif ($module eq 'tex') { + $modname = 'TeX'; + } else { + $modname = ucfirst($module); + } + if (! UNIVERSAL::can("Locale::Po4a::$modname", 'new')) { + eval qq{use Locale::Po4a::$modname}; + if ($@) { + my $error=$@; + warn wrap_msg(gettext("Unknown format type: %s."), $module); + warn wrap_mod("po4a::chooser", + gettext("Module loading error: %s"), $error) + if defined $options{'verbose'} && $options{'verbose'} > 0; + list(1); + } + } + return "Locale::Po4a::$modname"->new(%options); +} + +sub list { + warn wrap_msg(gettext("List of valid formats:") +# ."\n - ".gettext("bibtex: BibTex bibliography format.") + ."\n - ".gettext("dia: uncompressed Dia diagrams.") + ."\n - ".gettext("docbook: Docbook XML.") + ."\n - ".gettext("guide: Gentoo Linux's xml documentation format.") +# ."\n - ".gettext("html: HTML documents (EXPERIMENTAL).") + ."\n - ".gettext("ini: .INI format.") + ."\n - ".gettext("kernelhelp: Help messages of each kernel compilation option.") + ."\n - ".gettext("latex: LaTeX format.") + ."\n - ".gettext("man: Good old manual page format.") + ."\n - ".gettext("pod: Perl Online Documentation format.") + ."\n - ".gettext("sgml: either debiandoc or docbook DTD.") + ."\n - ".gettext("texinfo: The info page format.") + ."\n - ".gettext("tex: generic TeX documents (see also latex).") + ."\n - ".gettext("text: simple text document.") + ."\n - ".gettext("wml: WML documents.") + ."\n - ".gettext("xhtml: XHTML documents.") + ."\n - ".gettext("xml: generic XML documents (see also docbook).") + ); + exit shift; +} +############################################################################## +# Module return value and documentation +############################################################################## + +1; +__END__ + +=head1 NAME + +Locale::Po4a::Chooser - Manage po4a modules + +=head1 DESCRIPTION + +Locale::Po4a::Chooser is a module to manage po4a modules. Before, all po4a +binaries used to know all po4a modules (pod, man, sgml, etc). This made the +add of a new module boring, to make sure the documentation is synchronized +in all modules, and that each of them can access the new module. + +Now, you just have to call the Locale::Po4a::Chooser::new() function, +passing the name of module as argument. + +You also have the Locale::Po4a::Chooser::list() function which lists the +available format and exits on the value passed as argument. + +=head1 SEE ALSO + +=over 4 + +=item About po4a: + +L<po4a(7)|po4a.7>, +L<Locale::Po4a::TransTractor(3pm)>, +L<Locale::Po4a::Po(3pm)> + +=item About modules: + +L<Locale::Po4a::Dia(3pm)>, +L<Locale::Po4a::Docbook(3pm)>, +L<Locale::Po4a::Guide(3pm)>, +L<Locale::Po4a::Halibut(3pm)>, +L<Locale::Po4a::Ini(3pm)>, +L<Locale::Po4a::KernelHelp(3pm)>, +L<Locale::Po4a::LaTeX(3pm)>, +L<Locale::Po4a::Man(3pm)>, +L<Locale::Po4a::Pod(3pm)>, +L<Locale::Po4a::Sgml(3pm)>, +L<Locale::Po4a::TeX(3pm)>, +L<Locale::Po4a::Texinfo(3pm)>, +L<Locale::Po4a::Text(3pm)>, +L<Locale::Po4a::Wml(3pm)>. +L<Locale::Po4a::Xhtml(3pm)>, +L<Locale::Po4a::Xml(3pm)>, +L<Locale::Po4a::Wml(3pm)>. + +=back + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + +Copyright 2002,2003,2004,2005 by SPI, inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL (see the COPYING file). + +=cut
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/po4a/lib/Locale/Po4a/Common.pm Thu Mar 12 15:43:56 2009 +0800 @@ -0,0 +1,246 @@ +# Locale::Po4a::Common -- Common parts of the po4a scripts and utils +# $Id: Common.pm,v 1.20 2009-02-13 23:16:44 nekral-guest Exp $ +# +# Copyright 2005 by Jordi Vilalta <jvprat@gmail.com> +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL (see COPYING). +# +# This module has common utilities for the various scripts of po4a + +=head1 NAME + +Locale::Po4a::Common - Common parts of the po4a scripts and utils + +=head1 DESCRIPTION + +Locale::Po4a::Common contains common parts of the po4a scripts and some useful +functions used along the other modules. + +In order to use Locale::Po4a programatically, one may want to disable +the use of Text::WrapI18N, by writing e.g. + + use Locale::Po4a::Common qw(nowrapi18n); + use Locale::Po4a::Text; + +instead of: + + use Locale::Po4a::Text; + +Ordering is important here: as most Locale::Po4a modules themselves +load Locale::Po4a::Common, the first time this module is loaded +determines whether Text::WrapI18N is used. + +=cut + +package Locale::Po4a::Common; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Exporter); +@EXPORT = qw(wrap_msg wrap_mod wrap_ref_mod textdomain gettext dgettext); + +use 5.006; +use strict; +use warnings; + +sub import { + my $class=shift; + + my $wrapi18n=1; + if (exists $_[0] && defined $_[0] && $_[0] eq 'nowrapi18n') { + shift; + $wrapi18n=0; + } + $class->export_to_level(1, $class, @_); + + return if defined &wrapi18n; + + if ($wrapi18n && -t STDERR && -t STDOUT && eval { require Text::WrapI18N }) { + + # Don't bother determining the wrap column if we cannot wrap. + my $col=$ENV{COLUMNS}; + if (!defined $col) { + my @term=eval "use Term::ReadKey; Term::ReadKey::GetTerminalSize()"; + $col=$term[0] if (!$@); + # If GetTerminalSize() failed we will fallback to a safe default. + # This can happen if Term::ReadKey is not available + # or this is a terminal-less build or such strange condition. + } + $col=76 if (!defined $col); + + eval ' use Text::WrapI18N qw($columns); + $columns = $col; + '; + + eval ' sub wrapi18n($$$) { Text::WrapI18N::wrap($_[0],$_[1],$_[2]) } ' + } else { + + # If we cannot wrap, well, that's too bad. Survive anyway. + eval ' sub wrapi18n($$$) { $_[0].$_[2] } ' + } +} + +sub min($$) { + return $_[0] < $_[1] ? $_[0] : $_[1]; +} + +=head1 FUNCTIONS + +=head2 Showing output messages + +=over + +=item + +show_version($) + +Shows the current version of the script, and a short copyright message. It +takes the name of the script as an argument. + +=cut + +sub show_version { + my $name = shift; + + print sprintf(gettext( + "%s version %s.\n". + "written by Martin Quinson and Denis Barbier.\n\n". + "Copyright (C) 2002, 2003, 2004 Software of Public Interest, Inc.\n". + "This is free software; see source code for copying\n". + "conditions. There is NO warranty; not even for\n". + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + ), $name, $Locale::Po4a::TransTractor::VERSION)."\n"; +} + +=item + +wrap_msg($@) + +This function displays a message the same way than sprintf() does, but wraps +the result so that they look nice on the terminal. + +=cut + +sub wrap_msg($@) { + my $msg = shift; + my @args = @_; + + return wrapi18n("", "", sprintf($msg, @args))."\n"; +} + +=item + +wrap_mod($$@) + +This function works like wrap_msg(), but it takes a module name as the first +argument, and leaves a space at the left of the message. + +=cut + +sub wrap_mod($$@) { + my ($mod, $msg) = (shift, shift); + my @args = @_; + + $mod .= ": "; + my $spaces = " " x min(length($mod), 15); + return wrapi18n($mod, $spaces, sprintf($msg, @args))."\n"; +} + +=item + +wrap_ref_mod($$$@) + +This function works like wrap_msg(), but it takes a file:line reference as the +first argument, a module name as the second one, and leaves a space at the left +of the message. + +=back + +=cut + +sub wrap_ref_mod($$$@) { + my ($ref, $mod, $msg) = (shift, shift, shift); + my @args = @_; + + if (!$mod) { + # If we don't get a module name, show the message like wrap_mod does + return wrap_mod($ref, $msg, @args); + } else { + $ref .= ": "; + my $spaces = " " x min(length($ref), 15); + $msg = "$ref($mod)\n$msg"; + return wrapi18n("", $spaces, sprintf($msg, @args))."\n"; + } +} + +=head2 Wrappers for other modules + +=over + +=item + +Locale::Gettext + +When the Locale::Gettext module cannot be loaded, this module provide dummy +(empty) implementation of the following functions. In that case, po4a +messages won't get translated but the program will continue to work. + +If Locale::gettext is present, this wrapper also calls +setlocale(LC_MESSAGES, "") so callers don't depend on the POSIX module +either. + +=over + +=item + +bindtextdomain($$) + +=item + +textdomain($) + +=item + +gettext($) + +=item + +dgettext($$) + +=back + +=back + +=cut + +BEGIN { + if (eval { require Locale::gettext }) { + import Locale::gettext; + require POSIX; + POSIX::setlocale(&POSIX::LC_MESSAGES, ''); + } else { + eval ' + sub bindtextdomain($$) { } + sub textdomain($) { } + sub gettext($) { shift } + sub dgettext($$) { return $_[1] } + ' + } +} + +1; +__END__ + +=head1 AUTHORS + + Jordi Vilalta <jvprat@gmail.com> + +=head1 COPYRIGHT AND LICENSE + +Copyright 2005 by SPI, inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL (see the COPYING file). + +=cut
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/po4a/lib/Locale/Po4a/Docbook.pm Thu Mar 12 15:43:56 2009 +0800 @@ -0,0 +1,2040 @@ +#!/usr/bin/perl +# aptitude: cmdsynopsis => missing removal of leading spaces + +# Po4a::Docbook.pm +# +# extract and translate translatable strings from Docbook XML documents. +# +# This code extracts plain text from tags and attributes on Docbook XML +# documents. +# +# Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com> +# Copyright (c) 2007-2009 by Nicolas François <nicolas.francois@centraliens.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=head1 NAME + +Locale::Po4a::Docbook - Convert Docbook XML documents from/to PO files + +=head1 DESCRIPTION + +The po4a (po for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Docbook is a module to help the translation of DocBook XML +documents into other [human] languages. + +Please note that this module is still under heavy development, and not +distributed in official po4a release since we don't feel it to be mature +enough. If you insist on trying, check the CVS out. + +=head1 STATUS OF THIS MODULE + +This module is fully functional, as it relies in the L<Locale::Po4a::Xml> +module. This only defines the translatable tags and attributes. + +The only known issue is that it doesn't handle entities yet, and this includes +the file inclusion entities, but you can translate most of those files alone +(except the typical entities files), and it's usually better to maintain them +separated. + +=head1 SEE ALSO + +L<po4a(7)|po4a.7>, L<Locale::Po4a::TransTractor(3pm)>, L<Locale::Po4a::Xml(3pm)>. + +=head1 AUTHORS + + Jordi Vilalta <jvprat@gmail.com> + +=head1 COPYRIGHT AND LICENSE + + Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com> + Copyright (c) 2007-2009 by Nicolas François <nicolas.francois@centraliens.net> + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL (see the COPYING file). + +=cut + +package Locale::Po4a::Docbook; + +use 5.006; +use strict; +use warnings; + +use Locale::Po4a::Xml; + +use vars qw(@ISA); +@ISA = qw(Locale::Po4a::Xml); + +sub initialize { + my $self = shift; + my %options = @_; + + $self->SUPER::initialize(%options); + $self->{options}{'wrap'}=1; + $self->{options}{'doctype'}=$self->{options}{'doctype'} || 'docbook xml'; + +# AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + + # abbrev; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <abbrev>"; + $self->{options}{'_default_inline'} .= " <abbrev>"; + + # abstract; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <abstract>"; + $self->{options}{'_default_break'} .= " <abstract>"; + + # accel; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <accel>"; + $self->{options}{'_default_inline'} .= " <accel>"; + + # ackno; does not contain text; Formatted as a displayed block + # Replaced by acknowledgements in Docbook v5.0 + $self->{options}{'_default_untranslated'} .= " <ackno>"; + $self->{options}{'_default_break'} .= " <ackno>"; + # acknowledgements; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <acknowledgements>"; + $self->{options}{'_default_break'} .= " <acknowledgements>"; + + # acronym; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <acronym>"; + $self->{options}{'_default_inline'} .= " <acronym>"; + + # action; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <action>"; + $self->{options}{'_default_inline'} .= " <action>"; + + # address; contains text; Formatted as a displayed block; verbatim + $self->{options}{'_default_translated'} .= " W<address>"; + $self->{options}{'_default_placeholder'} .= " <address>"; + + # affiliation; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <affiliation>"; + $self->{options}{'_default_inline'} .= " <affiliation>"; + + # alt; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <alt>"; + $self->{options}{'_default_inline'} .= " <alt>"; + + # anchor; does not contain text; Produces no output + $self->{options}{'_default_untranslated'} .= " <anchor>"; + $self->{options}{'_default_inline'} .= " <anchor>"; + + # annotation; does not contain text; + $self->{options}{'_default_untranslated'} .= " <annotation>"; + $self->{options}{'_default_placeholder'} .= " <annotation>"; + + # answer; does not contain text; + $self->{options}{'_default_untranslated'} .= " <answer>"; + $self->{options}{'_default_break'} .= " <answer>"; + + # appendix; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <appendix>"; + $self->{options}{'_default_break'} .= " <appendix>"; + + # appendixinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <appendixinfo>"; + $self->{options}{'_default_placeholder'} .= " <appendixinfo>"; + + # application; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <application>"; + $self->{options}{'_default_inline'} .= " <application>"; + + # arc; does not contain text; + $self->{options}{'_default_untranslated'} .= " <arc>"; + $self->{options}{'_default_inline'} .= " <arc>"; + + # area; does not contain text; + # NOTE: the area is not translatable as is, but the coords + # attribute might be. + $self->{options}{'_default_untranslated'} .= " <area>"; + $self->{options}{'_default_inline'} .= " <area>"; + + # areaset; does not contain text; + # NOTE: the areaset is not translatable as is. depending on the + # language there might be more or less area tags inside. + $self->{options}{'_default_untranslated'} .= " <areaset>"; + $self->{options}{'_default_inline'} .= " <areaset>"; + + # areaspec; does not contain text; + # NOTE: see area and areaset + $self->{options}{'_default_translated'} .= " <areaspec>"; + $self->{options}{'_default_break'} .= " <areaspec>"; + + # arg; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <arg>"; + $self->{options}{'_default_inline'} .= " <arg>"; + + # artheader; does not contain text; renamed to articleinfo in v4.0 + $self->{options}{'_default_untranslated'} .= " <artheader>"; + $self->{options}{'_default_placeholder'} .= " <artheader>"; + + # article; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <article>"; + $self->{options}{'_default_break'} .= " <article>"; + + # articleinfo; does not contain text; v4 only + $self->{options}{'_default_untranslated'} .= " <articleinfo>"; + $self->{options}{'_default_placeholder'} .= " <articleinfo>"; + + # artpagenums; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <artpagenums>"; + $self->{options}{'_default_inline'} .= " <artpagenums>"; + + # attribution; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <attribution>"; + $self->{options}{'_default_inline'} .= " <attribution>"; + + # audiodata; does not contain text; + # NOTE: the attributes might be translated + $self->{options}{'_default_translated'} .= " <audiodata>"; + $self->{options}{'_default_placeholder'} .= " <audiodata>"; + $self->{options}{'_default_attributes'}.=' <audiodata>fileref'; + + # audioobject; does not contain text; + # NOTE: might be contaioned in a inlinemediaobject + $self->{options}{'_default_translated'} .= " <audioobject>"; + $self->{options}{'_default_placeholder'} .= " <audioobject>"; + + # author; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <author>"; + $self->{options}{'_default_inline'} .= " <author>"; + + # authorblurb; does not contain text; Formatted as a displayed block. + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <authorblurb>"; + $self->{options}{'_default_placeholder'} .= " <authorblurb>"; + + # authorgroup; does not contain text; Formatted inline or as a + # displayed block depending on context + # NOTE: given the possible parents, it is probably very rarely + # inlined + $self->{options}{'_default_untranslated'} .= " <authorgroup>"; + $self->{options}{'_default_break'} .= " <authorgroup>"; + + # authorinitials; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <authorinitials>"; + $self->{options}{'_default_inline'} .= " <authorinitials>"; + +# BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + + # beginpage; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <beginpage>"; + $self->{options}{'_default_break'} .= " <beginpage>"; + + # bibliocoverage; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <bibliocoverage>"; + $self->{options}{'_default_inline'} .= " <bibliocoverage>"; + + # bibliodiv; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <bibliodiv>"; + $self->{options}{'_default_break'} .= " <bibliodiv>"; + + # biblioentry; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <biblioentry>"; + $self->{options}{'_default_break'} .= " <biblioentry>"; + + # bibliography; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <bibliography>"; + $self->{options}{'_default_break'} .= " <bibliography>"; + + # bibliographyinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <bibliographyinfo>"; + $self->{options}{'_default_placeholder'} .= " <bibliographyinfo>"; + + # biblioid; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <biblioid>"; + $self->{options}{'_default_inline'} .= " <biblioid>"; + + # bibliolist; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <bibliolist>"; + $self->{options}{'_default_break'} .= " <bibliolist>"; + + # bibliomisc; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <bibliomisc>"; + $self->{options}{'_default_inline'} .= " <bibliomisc>"; + + # bibliomixed; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <bibliomixed>"; + $self->{options}{'_default_placeholder'} .= " <bibliomixed>"; + + # bibliomset; contains text; Formatted as a displayed block + # NOTE: content might need to be inlined, e.g. <bibliomset><title> + $self->{options}{'_default_translated'} .= " <bibliomset>"; + $self->{options}{'_default_placeholder'} .= " <bibliomset>"; + + # biblioref; does not contain text; Formatted inline + $self->{options}{'_default_untranslated'} .= " <biblioref>"; + $self->{options}{'_default_inline'} .= " <biblioref>"; + + # bibliorelation; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <bibliorelation>"; + $self->{options}{'_default_inline'} .= " <bibliorelation>"; + + # biblioset; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <biblioset>"; + $self->{options}{'_default_break'} .= " <biblioset>"; + + # bibliosource; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <bibliosource>"; + $self->{options}{'_default_inline'} .= " <bibliosource>"; + + # blockinfo; does not contain text; v4.2, not in v5 + $self->{options}{'_default_untranslated'} .= " <blockinfo>"; + $self->{options}{'_default_placeholder'} .= " <blockinfo>"; + + # blockquote; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <blockquote>"; + $self->{options}{'_default_break'} .= " <blockquote>"; + + # book; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <book>"; + $self->{options}{'_default_break'} .= " <book>"; + + # bookbiblio; does not contain text; Formatted as a displayed block + # Removed in v4.0 + $self->{options}{'_default_untranslated'} .= " <bookbiblio>"; + $self->{options}{'_default_break'} .= " <bookbiblio>"; + + # bookinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <bookinfo>"; + $self->{options}{'_default_placeholder'} .= " <bookinfo>"; + + # bridgehead; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <bridgehead>"; + $self->{options}{'_default_break'} .= " <bridgehead>"; + +# CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC + + # callout; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <callout>"; + $self->{options}{'_default_break'} .= " <callout>"; + + # calloutlist; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <calloutlist>"; + $self->{options}{'_default_break'} .= " <calloutlist>"; + + # caption; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <caption>"; + $self->{options}{'_default_break'} .= " <caption>"; + + # caption (db.html.caption); contains text; Formatted as a displayed block + # TODO: Check if this works + $self->{options}{'_default_translated'} .= " <table><caption>"; + $self->{options}{'_default_break'} .= " <table><caption>"; + + # caution; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <caution>"; + $self->{options}{'_default_break'} .= " <caution>"; + + # chapter; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <chapter>"; + $self->{options}{'_default_break'} .= " <chapter>"; + + # chapterinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <chapterinfo>"; + $self->{options}{'_default_placeholder'} .= " <chapterinfo>"; + + # citation; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <citation>"; + $self->{options}{'_default_inline'} .= " <citation>"; + + # citebiblioid; contains text; Formatted inline + # NOTE: maybe untranslated? + $self->{options}{'_default_translated'} .= " <citebiblioid>"; + $self->{options}{'_default_inline'} .= " <citebiblioid>"; + + # citerefentry; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <citerefentry>"; + $self->{options}{'_default_inline'} .= " <citerefentry>"; + + # citetitle; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <citetitle>"; + $self->{options}{'_default_inline'} .= " <citetitle>"; + + # city; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <city>"; + $self->{options}{'_default_inline'} .= " <city>"; + + # classname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <classname>"; + $self->{options}{'_default_inline'} .= " <classname>"; + + # classsynopsis; does not contain text; may be in a para + # NOTE: It may contain a classsynopsisinfo, which should be + # verbatim + # XXX: since it is in untranslated class, does the W flag takes + # effect? + $self->{options}{'_default_untranslated'} .= " W<classsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <classsynopsis>"; + + # classsynopsisinfo; contains text; + # NOTE: see above + $self->{options}{'_default_translated'} .= " W<classsynopsisinfo>"; + $self->{options}{'_default_inline'} .= " <classsynopsisinfo>"; + + # cmdsynopsis; does not contain text; may be in a para + # NOTE: It may be clearer as a verbatim block + # XXX: since it is in untranslated class, does the W flag takes + # effect? => not completely. Rewrap afterward? + $self->{options}{'_default_untranslated'} .= " W<cmdsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <cmdsynopsis>"; + + # co; does not contain text; Formatted inline + # XXX: tranlsated or not? (label attribute) + $self->{options}{'_default_translated'} .= " <co>"; + $self->{options}{'_default_inline'} .= " <co>"; + + # code; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <code>"; + $self->{options}{'_default_inline'} .= " <code>"; + + # col; does not contain text; + # NOTE: could be translated to change the layout in a translation + # To be done on colgroup in that case. + $self->{options}{'_default_untranslated'} .= " <col>"; + $self->{options}{'_default_break'} .= " <col>"; + + # colgroup; does not contain text; + # NOTE: could be translated to change the layout in a translation + $self->{options}{'_default_untranslated'} .= " <colgroup>"; + $self->{options}{'_default_break'} .= " <colgroup>"; + + # collab; does not contain text; Formatted inline or as a + # displayed block depending on context + # NOTE: could be in the break class + $self->{options}{'_default_untranslated'} .= " <collab>"; + $self->{options}{'_default_inline'} .= " <collab>"; + + # collabname; contains text; Formatted inline or as a + # displayed block depending on context; v4, not in v5 + $self->{options}{'_default_translated'} .= " <collabname>"; + $self->{options}{'_default_inline'} .= " <collabname>"; + + # colophon; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <colophon>"; + $self->{options}{'_default_break'} .= " <colophon>"; + + # colspec; does not contain text; + # NOTE: could be translated to change the layout in a translation + $self->{options}{'_default_untranslated'} .= " <colspec>"; + $self->{options}{'_default_break'} .= " <colspec>"; + + # command; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <command>"; + $self->{options}{'_default_inline'} .= " <command>"; + + # comment; contains text; Formatted inline or as a displayed block + # Renamed to remark in v4.0 + $self->{options}{'_default_translated'} .= " <comment>"; + $self->{options}{'_default_inline'} .= " <comment>"; + + # computeroutput; contains text; Formatted inline + # NOTE: "is not a verbatim environment, but an inline." + $self->{options}{'_default_translated'} .= " <computeroutput>"; + $self->{options}{'_default_inline'} .= " <computeroutput>"; + + # confdates; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <confdates>"; + $self->{options}{'_default_inline'} .= " <confdates>"; + + # confgroup; does not contain text; Formatted inline or as a + # displayed block depending on context + # NOTE: could be in the break class + $self->{options}{'_default_untranslated'} .= " <confgroup>"; + $self->{options}{'_default_inline'} .= " <confgroup>"; + + # confnum; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <confnum>"; + $self->{options}{'_default_inline'} .= " <confnum>"; + + # confsponsor; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <confsponsor>"; + $self->{options}{'_default_inline'} .= " <confsponsor>"; + + # conftitle; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <conftitle>"; + $self->{options}{'_default_inline'} .= " <conftitle>"; + + # constant; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <constant>"; + $self->{options}{'_default_inline'} .= " <constant>"; + + # constraint; does not contain text; + # NOTE: it might be better to have the production as verbatim + # Keeping the constrainst inline to have it close to the + # lhs or rhs. + # The attribute is translatable + $self->{options}{'_default_untranslated'} .= " <constraint>"; + $self->{options}{'_default_break'} .= " <constraint>"; + + # constraintdef; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <constraintdef>"; + $self->{options}{'_default_break'} .= " <constraintdef>"; + + # constructorsynopsis; does not contain text; may be in a para + # NOTE: It may be clearer as a verbatim block + # XXX: since it is in untranslated class, does the W flag takes + # effect? + $self->{options}{'_default_untranslated'} .= " W<constructorsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <constructorsynopsis>"; + + # contractnum; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <contractnum>"; + $self->{options}{'_default_inline'} .= " <contractnum>"; + + # contractsponsor; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <contractsponsor>"; + $self->{options}{'_default_inline'} .= " <contractsponsor>"; + + # contrib; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <contrib>"; + $self->{options}{'_default_inline'} .= " <contrib>"; + + # copyright; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <copyright>"; + $self->{options}{'_default_inline'} .= " <copyright>"; + + # coref; does not contain text; Formatted inline + # XXX: tranlsated or not? (label attribute) + $self->{options}{'_default_translated'} .= " <coref>"; + $self->{options}{'_default_inline'} .= " <coref>"; + + # corpauthor; contains text; Formatted inline or as a + # displayed block depending on context; v4, not in v5 + $self->{options}{'_default_translated'} .= " <corpauthor>"; + $self->{options}{'_default_inline'} .= " <corpauthor>"; + + # corpcredit; contains text; Formatted inline or as a + # displayed block depending on context; v4, not in v5 + $self->{options}{'_default_translated'} .= " <corpcredit>"; + $self->{options}{'_default_inline'} .= " <corpcredit>"; + + # corpname; contains text; Formatted inline or as a + # displayed block depending on context; v4, not in v5 + $self->{options}{'_default_translated'} .= " <corpname>"; + $self->{options}{'_default_inline'} .= " <corpname>"; + + # country; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <country>"; + $self->{options}{'_default_inline'} .= " <country>"; + + # cover; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <cover>"; + $self->{options}{'_default_break'} .= " <cover>"; + +# DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + + # database; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <database>"; + $self->{options}{'_default_inline'} .= " <database>"; + + # date; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <date>"; + $self->{options}{'_default_inline'} .= " <date>"; + + # dedication; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <dedication>"; + $self->{options}{'_default_break'} .= " <dedication>"; + + # destructorsynopsis; does not contain text; may be in a para + # NOTE: It may be clearer as a verbatim block + # XXX: since it is in untranslated class, does the W flag takes + # effect? + $self->{options}{'_default_untranslated'} .= " W<destructorsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <destructorsynopsis>"; + + # docinfo; does not contain text; removed in v4.0 + $self->{options}{'_default_untranslated'} .= " <docinfo>"; + $self->{options}{'_default_placeholder'} .= " <docinfo>"; + +# EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE + + # edition; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <edition>"; + $self->{options}{'_default_inline'} .= " <edition>"; + + # editor; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <editor>"; + $self->{options}{'_default_inline'} .= " <editor>"; + + # email; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <email>"; + $self->{options}{'_default_inline'} .= " <email>"; + + # emphasis; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <emphasis>"; + $self->{options}{'_default_inline'} .= " <emphasis>"; + + # entry; contains text; + $self->{options}{'_default_translated'} .= " <entry>"; + $self->{options}{'_default_break'} .= " <entry>"; + + # entrytbl; does not contain text; + $self->{options}{'_default_untranslated'} .= " <entrytbl>"; + $self->{options}{'_default_break'} .= " <entrytbl>"; + + # envar; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <envar>"; + $self->{options}{'_default_inline'} .= " <envar>"; + + # epigraph; contains text; Formatted as a displayed block. + # NOTE: maybe contained in a para + $self->{options}{'_default_translated'} .= " <epigraph>"; + $self->{options}{'_default_placeholder'} .= " <epigraph>"; + + # equation; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <equation>"; + $self->{options}{'_default_break'} .= " <equation>"; + + # errorcode; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <errorcode>"; + $self->{options}{'_default_inline'} .= " <errorcode>"; + + # errorname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <errorname>"; + $self->{options}{'_default_inline'} .= " <errorname>"; + + # errortext; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <errortext>"; + $self->{options}{'_default_inline'} .= " <errortext>"; + + # errortype; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <errortype>"; + $self->{options}{'_default_inline'} .= " <errortype>"; + + # example; does not contain text; Formatted as a displayed block. + # NOTE: maybe contained in a para + $self->{options}{'_default_untranslated'} .= " <example>"; + $self->{options}{'_default_placeholder'} .= " <example>"; + + # exceptionname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <exceptionname>"; + $self->{options}{'_default_inline'} .= " <exceptionname>"; + + # extendedlink; does not contain text; + $self->{options}{'_default_untranslated'} .= " <extendedlink>"; + $self->{options}{'_default_inline'} .= " <extendedlink>"; + +# FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + + # fax; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <fax>"; + $self->{options}{'_default_inline'} .= " <fax>"; + + # fieldsynopsis; does not contain text; may be in a para + $self->{options}{'_default_untranslated'} .= " <fieldsynopsis>"; + $self->{options}{'_default_inline'} .= " <fieldsynopsis>"; + + # figure; does not contain text; Formatted as a displayed block. + # NOTE: maybe contained in a para + $self->{options}{'_default_untranslated'} .= " <figure>"; + $self->{options}{'_default_placeholder'} .= " <figure>"; + + # filename; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <filename>"; + $self->{options}{'_default_inline'} .= " <filename>"; + + # firstname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <firstname>"; + $self->{options}{'_default_inline'} .= " <firstname>"; + + # firstterm; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <firstterm>"; + $self->{options}{'_default_inline'} .= " <firstterm>"; + + # footnote; contains text; + $self->{options}{'_default_translated'} .= " <footnote>"; + $self->{options}{'_default_placeholder'} .= " <footnote>"; + + # footnoteref; contains text; + $self->{options}{'_default_translated'} .= " <footnoteref>"; + $self->{options}{'_default_inline'} .= " <footnoteref>"; + + # foreignphrase; contains text; + $self->{options}{'_default_translated'} .= " <foreignphrase>"; + $self->{options}{'_default_inline'} .= " <foreignphrase>"; + + # formalpara; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <formalpara>"; + $self->{options}{'_default_break'} .= " <formalpara>"; + + # funcdef; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <funcdef>"; + $self->{options}{'_default_inline'} .= " <funcdef>"; + + # funcparams; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <funcparams>"; + $self->{options}{'_default_inline'} .= " <funcparams>"; + + # funcprototype; does not contain text; + # NOTE: maybe contained in a funcsynopsis, contained in a para + $self->{options}{'_default_untranslated'} .= " <funcprototype>"; + $self->{options}{'_default_placeholder'} .= " <funcprototype>"; + + # funcsynopsis; does not contain text; + # NOTE: maybe contained in a para + $self->{options}{'_default_untranslated'} .= " <funcsynopsis>"; + $self->{options}{'_default_placeholder'} .= " <funcsynopsis>"; + + # funcsynopsisinfo; contains text; verbatim + # NOTE: maybe contained in a funcsynopsis, contained in a para + $self->{options}{'_default_translated'} .= " W<funcsynopsisinfo>"; + $self->{options}{'_default_placeholder'} .= " <funcsynopsisinfo>"; + + # function; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <function>"; + $self->{options}{'_default_inline'} .= " <function>"; + +# GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG + + # glossary; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glossary>"; + $self->{options}{'_default_break'} .= " <glossary>"; + + # glossaryinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <glossaryinfo>"; + $self->{options}{'_default_placeholder'} .= " <glossaryinfo>"; + + # glossdef; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glossdef>"; + $self->{options}{'_default_break'} .= " <glossdef>"; + + # glossdiv; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glossdiv>"; + $self->{options}{'_default_break'} .= " <glossdiv>"; + + # glossentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glossentry>"; + $self->{options}{'_default_break'} .= " <glossentry>"; + + # glosslist; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <glosslist>"; + $self->{options}{'_default_break'} .= " <glosslist>"; + + # glosssee; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <glosssee>"; + $self->{options}{'_default_break'} .= " <glosssee>"; + + # glossseealso; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <glossseealso>"; + $self->{options}{'_default_break'} .= " <glossseealso>"; + + # glossterm; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <glossterm>"; + $self->{options}{'_default_inline'} .= " <glossterm>"; + + # graphic; does not contain text; Formatted as a displayed block + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <graphic>"; + $self->{options}{'_default_inline'} .= " <graphic>"; + $self->{options}{'_default_attributes'}.=' <graphic>fileref'; + + # graphicco; does not contain text; Formatted as a displayed block. + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <graphicco>"; + $self->{options}{'_default_placeholder'} .= " <graphicco>"; + + # group; does not contain text; Formatted inline + $self->{options}{'_default_untranslated'} .= " W<group>"; + $self->{options}{'_default_inline'} .= " <group>"; + + # guibutton; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guibutton>"; + $self->{options}{'_default_inline'} .= " <guibutton>"; + + # guiicon; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guiicon>"; + $self->{options}{'_default_inline'} .= " <guiicon>"; + + # guilabel; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guilabel>"; + $self->{options}{'_default_inline'} .= " <guilabel>"; + + # guimenu; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guimenu>"; + $self->{options}{'_default_inline'} .= " <guimenu>"; + + # guimenuitem; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guimenuitem>"; + $self->{options}{'_default_inline'} .= " <guimenuitem>"; + + # guisubmenu; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <guisubmenu>"; + $self->{options}{'_default_inline'} .= " <guisubmenu>"; + +# HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH + + # hardware; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <hardware>"; + $self->{options}{'_default_inline'} .= " <hardware>"; + + # highlights; does not contain text; Formatted inline + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <highlights>"; + $self->{options}{'_default_break'} .= " <highlights>"; + + # holder; contains text; + # NOTE: may depend on the copyright container + $self->{options}{'_default_translated'} .= " <holder>"; + $self->{options}{'_default_inline'} .= " <holder>"; + + # honorific; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <honorific>"; + $self->{options}{'_default_inline'} .= " <honorific>"; + + # html:button; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:button>"; + $self->{options}{'_default_inline'} .= " <html:button>"; + + # html:fieldset; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:fieldset>"; + $self->{options}{'_default_inline'} .= " <html:fieldset>"; + + # html:form; does not contain text; + $self->{options}{'_default_translated'} .= " <html:form>"; + $self->{options}{'_default_inline'} .= " <html:form>"; + + # html:input; does not contain text; Formatted inline + # NOTE: attributes are translatable + $self->{options}{'_default_translated'} .= " <html:input>"; + $self->{options}{'_default_inline'} .= " <html:input>"; + + # html:label; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:label>"; + $self->{options}{'_default_inline'} .= " <html:label>"; + + # html:legend; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:legend>"; + $self->{options}{'_default_inline'} .= " <html:legend>"; + + # html:option; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:option>"; + $self->{options}{'_default_inline'} .= " <html:option>"; + + # html:select; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <html:select>"; + $self->{options}{'_default_inline'} .= " <html:select>"; + + # html:textarea; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <html:textarea>"; + $self->{options}{'_default_placeholder'} .= " <html:textarea>"; + + # imagedata; does not contain text; May be formatted inline or + # as a displayed block, depending on context + $self->{options}{'_default_translated'} .= " <imagedata>"; + $self->{options}{'_default_inline'} .= " <imagedata>"; + $self->{options}{'_default_attributes'}.=' <imagedata>fileref'; + + # imageobject; does not contain text; May be formatted inline or + # as a displayed block, depending on context + $self->{options}{'_default_untranslated'} .= " <imageobject>"; + $self->{options}{'_default_inline'} .= " <imageobject>"; + + # imageobjectco; does not contain text; Formatted as a displayed block + # NOTE: may be in a inlinemediaobject + # TODO: check if this works when the inlinemediaobject is defined + # as inline + $self->{options}{'_default_untranslated'} .= " <imageobjectco>"; + $self->{options}{'_default_break'} .= " <imageobjectco>"; + + # important; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <important>"; + $self->{options}{'_default_break'} .= " <important>"; + + # index; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <index>"; + $self->{options}{'_default_break'} .= " <index>"; + + # indexdiv; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <indexdiv>"; + $self->{options}{'_default_break'} .= " <indexdiv>"; + + # indexentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <indexentry>"; + $self->{options}{'_default_break'} .= " <indexentry>"; + + # indexinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <indexinfo>"; + $self->{options}{'_default_placeholder'} .= " <indexinfo>"; + + # indexterm; does not contain text; + $self->{options}{'_default_untranslated'} .= " <indexterm>"; + $self->{options}{'_default_placeholder'} .= " <indexterm>"; + + # info; does not contain text; + $self->{options}{'_default_untranslated'} .= " <info>"; + $self->{options}{'_default_placeholder'} .= " <info>"; + + # informalequation; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <informalequation>"; + $self->{options}{'_default_placeholder'} .= " <informalequation>"; + + # informalexample; does not contain text; Formatted as a displayed block. + # NOTE: can be in a para + $self->{options}{'_default_untranslated'} .= " <informalexample>"; + $self->{options}{'_default_break'} .= " <informalexample>"; + + # informalfigure; does not contain text; Formatted as a displayed block. + # NOTE: can be in a para + $self->{options}{'_default_untranslated'} .= " <informalfigure>"; + $self->{options}{'_default_break'} .= " <informalfigure>"; + + # informaltable; does not contain text; Formatted as a displayed block. + # NOTE: can be in a para + $self->{options}{'_default_untranslated'} .= " <informaltable>"; + $self->{options}{'_default_break'} .= " <informaltable>"; + + # initializer; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <initializer>"; + $self->{options}{'_default_inline'} .= " <initializer>"; + + # inlineequation; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " W<inlineequation>"; + $self->{options}{'_default_placeholder'} .= " <inlineequation>"; + + # inlinegraphic; does not contain text; Formatted inline + # empty; v4, not in v5 + $self->{options}{'_default_translated'} .= " W<inlinegraphic>"; + $self->{options}{'_default_inline'} .= " <inlinegraphic>"; + + # inlinemediaobject; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <inlinemediaobject>"; + $self->{options}{'_default_placeholder'} .= " <inlinemediaobject>"; + + # interface; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <interface>"; + $self->{options}{'_default_inline'} .= " <interface>"; + + # interfacedefinition; contains text; Formatted inline + # Removed in v4.0 + $self->{options}{'_default_translated'} .= " <interfacedefinition>"; + $self->{options}{'_default_inline'} .= " <interfacedefinition>"; + + # interfacename; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <interfacename>"; + $self->{options}{'_default_inline'} .= " <interfacename>"; + + # invpartnumber; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <invpartnumber>"; + $self->{options}{'_default_inline'} .= " <invpartnumber>"; + + # isbn; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <isbn>"; + $self->{options}{'_default_inline'} .= " <isbn>"; + + # issn; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <issn>"; + $self->{options}{'_default_inline'} .= " <issn>"; + + # issuenum; contains text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <issuenum>"; + $self->{options}{'_default_inline'} .= " <issuenum>"; + + # itemizedlist; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <itemizedlist>"; + $self->{options}{'_default_break'} .= " <itemizedlist>"; + + # itermset; does not contain text; + # FIXME + $self->{options}{'_default_untranslated'} .= " <itermset>"; + $self->{options}{'_default_inline'} .= " <itermset>"; + +# JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ + + # jobtitle; contains text; Formatted inline or as a displayed block + # NOTE: can be in a para + $self->{options}{'_default_translated'} .= " <jobtitle>"; + $self->{options}{'_default_inline'} .= " <jobtitle>"; + +# KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK + + # keycap; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <keycap>"; + $self->{options}{'_default_inline'} .= " <keycap>"; + + # keycode; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <keycode>"; + $self->{options}{'_default_inline'} .= " <keycode>"; + + # keycombo; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <keycombo>"; + $self->{options}{'_default_inline'} .= " <keycombo>"; + + # keysym; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <keysym>"; + $self->{options}{'_default_inline'} .= " <keysym>"; + + # keyword; contains text; + # NOTE: could be inline + $self->{options}{'_default_translated'} .= " <keyword>"; + $self->{options}{'_default_break'} .= " <keyword>"; + + # keywordset; contains text; Formatted inline or as a displayed block + # NOTE: could be placeholder/break + $self->{options}{'_default_translated'} .= " <keywordset>"; + $self->{options}{'_default_break'} .= " <keywordset>"; + +# LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL + + # label; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <label>"; + $self->{options}{'_default_break'} .= " <label>"; + + # legalnotice; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <legalnotice>"; + $self->{options}{'_default_break'} .= " <legalnotice>"; + + # lhs; contains text; Formatted as a displayed block. + # NOTE: it might be better to have the production as verbatim + # Keeping the constrainst inline to have it close to the + # lhs or rhs. + $self->{options}{'_default_translated'} .= " <lhs>"; + $self->{options}{'_default_break'} .= " <lhs>"; + + # lineage; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <lineage>"; + $self->{options}{'_default_inline'} .= " <lineage>"; + + # lineannotation; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <lineannotation>"; + $self->{options}{'_default_inline'} .= " <lineannotation>"; + + # link; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <link>"; + $self->{options}{'_default_inline'} .= " <link>"; + + # listitem; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <listitem>"; + $self->{options}{'_default_break'} .= " <listitem>"; + + # literal; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <literal>"; + $self->{options}{'_default_inline'} .= " <literal>"; + + # literallayout; contains text; verbatim + $self->{options}{'_default_translated'} .= " W<literallayout>"; + $self->{options}{'_default_placeholder'} .= " <literallayout>"; + + # locator; does not contain text; + $self->{options}{'_default_untranslated'} .= " <locator>"; + $self->{options}{'_default_inline'} .= " <locator>"; + + # lot; does not contain text; Formatted as a displayed block. + # v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <lot>"; + $self->{options}{'_default_break'} .= " <lot>"; + + # lotentry; contains text; Formatted as a displayed block. + # v4, not in v5 + $self->{options}{'_default_translated'} .= " <lotentry>"; + $self->{options}{'_default_break'} .= " <lotentry>"; + +# MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM + + # manvolnum; contains text; + $self->{options}{'_default_translated'} .= " <manvolnum>"; + $self->{options}{'_default_inline'} .= " <manvolnum>"; + + # markup; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <markup>"; + $self->{options}{'_default_inline'} .= " <markup>"; + + # mathphrase; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <mathphrase>"; + $self->{options}{'_default_inline'} .= " <mathphrase>"; + + # medialabel; contains text; Formatted inline + # v4, not in v5 + $self->{options}{'_default_translated'} .= " <medialabel>"; + $self->{options}{'_default_inline'} .= " <medialabel>"; + + # mediaobject; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <mediaobject>"; + $self->{options}{'_default_placeholder'} .= " <mediaobject>"; + + # mediaobjectco; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <mediaobjectco>"; + $self->{options}{'_default_placeholder'} .= " <mediaobjectco>"; + + # member; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <member>"; + $self->{options}{'_default_inline'} .= " <member>"; + + # menuchoice; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <menuchoice>"; + $self->{options}{'_default_inline'} .= " <menuchoice>"; + + # methodname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <methodname>"; + $self->{options}{'_default_inline'} .= " <methodname>"; + + # methodparam; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <methodparam>"; + $self->{options}{'_default_inline'} .= " <methodparam>"; + + # methodsynopsis; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <methodsynopsis>"; + $self->{options}{'_default_inline'} .= " <methodsynopsis>"; + + # modifier; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <modifier>"; + $self->{options}{'_default_inline'} .= " <modifier>"; + + # mousebutton; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <mousebutton>"; + $self->{options}{'_default_inline'} .= " <mousebutton>"; + + # msg; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msg>"; + $self->{options}{'_default_break'} .= " <msg>"; + + # msgaud; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <msgaud>"; + $self->{options}{'_default_break'} .= " <msgaud>"; + + # msgentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgentry>"; + $self->{options}{'_default_break'} .= " <msgentry>"; + + # msgexplan; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgexplan>"; + $self->{options}{'_default_break'} .= " <msgexplan>"; + + # msginfo; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msginfo>"; + $self->{options}{'_default_break'} .= " <msginfo>"; + + # msglevel; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <msglevel>"; + $self->{options}{'_default_break'} .= " <msglevel>"; + + # msgmain; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgmain>"; + $self->{options}{'_default_break'} .= " <msgmain>"; + + # msgorig; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <msgorig>"; + $self->{options}{'_default_break'} .= " <msgorig>"; + + # msgrel; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgrel>"; + $self->{options}{'_default_break'} .= " <msgrel>"; + + # msgset; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgset>"; + $self->{options}{'_default_placeholder'} .= " <msgset>"; + + # msgsub; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgsub>"; + $self->{options}{'_default_break'} .= " <msgsub>"; + + # msgtext; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <msgtext>"; + $self->{options}{'_default_break'} .= " <msgtext>"; + +# NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN + + # nonterminal; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <nonterminal>"; + $self->{options}{'_default_inline'} .= " <nonterminal>"; + + # note; does not contain text; Formatted inline + # NOTE: can be in a para + $self->{options}{'_default_untranslated'} .= " <note>"; + $self->{options}{'_default_inline'} .= " <note>"; + +# OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO + + # objectinfo; does not contain text; v3.1 -> v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <objectinfo>"; + $self->{options}{'_default_placeholder'} .= " <objectinfo>"; + + # olink; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <olink>"; + $self->{options}{'_default_inline'} .= " <olink>"; + + # ooclass; does not contain text; Formatted inline + $self->{options}{'_default_translated'} .= " <ooclass>"; + $self->{options}{'_default_inline'} .= " <ooclass>"; + + # ooexception; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <ooexception>"; + $self->{options}{'_default_inline'} .= " <ooexception>"; + + # oointerface; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <oointerface>"; + $self->{options}{'_default_inline'} .= " <oointerface>"; + + # option; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <option>"; + $self->{options}{'_default_inline'} .= " <option>"; + + # optional; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <optional>"; + $self->{options}{'_default_inline'} .= " <optional>"; + + # orderedlist; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <orderedlist>"; + $self->{options}{'_default_placeholder'} .= " <orderedlist>"; + + # org; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <org>"; + $self->{options}{'_default_inline'} .= " <org>"; + + # orgdiv; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <orgdiv>"; + $self->{options}{'_default_inline'} .= " <orgdiv>"; + + # orgname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <orgname>"; + $self->{options}{'_default_inline'} .= " <orgname>"; + + # otheraddr; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <otheraddr>"; + $self->{options}{'_default_inline'} .= " <otheraddr>"; + + # othercredit; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <othercredit>"; + $self->{options}{'_default_inline'} .= " <othercredit>"; + + # othername; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <othername>"; + $self->{options}{'_default_inline'} .= " <othername>"; + +# PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP + + # package; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <package>"; + $self->{options}{'_default_inline'} .= " <package>"; + + # pagenums; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <pagenums>"; + $self->{options}{'_default_inline'} .= " <pagenums>"; + + # para; contains text; Formatted as a displayed block + $self->{options}{'_default_translated'} .= " <para>"; + $self->{options}{'_default_break'} .= " <para>"; + + # paramdef; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <paramdef>"; + $self->{options}{'_default_inline'} .= " <paramdef>"; + + # parameter; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <parameter>"; + $self->{options}{'_default_inline'} .= " <parameter>"; + + # part; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <part>"; + $self->{options}{'_default_break'} .= " <part>"; + + # partinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <partinfo>"; + $self->{options}{'_default_placeholder'} .= " <partinfo>"; + + # partintro; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <partintro>"; + $self->{options}{'_default_break'} .= " <partintro>"; + + # person; does not contain text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_untranslated'} .= " <person>"; + $self->{options}{'_default_inline'} .= " <person>"; + + # personblurb; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <personblurb>"; + $self->{options}{'_default_placeholder'} .= " <personblurb>"; + + # personname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <personname>"; + $self->{options}{'_default_inline'} .= " <personname>"; + + # phone; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <phone>"; + $self->{options}{'_default_inline'} .= " <phone>"; + + # phrase; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <phrase>"; + $self->{options}{'_default_inline'} .= " <phrase>"; + + # pob; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <pob>"; + $self->{options}{'_default_inline'} .= " <pob>"; + + # postcode; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <postcode>"; + $self->{options}{'_default_inline'} .= " <postcode>"; + + # preface; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <preface>"; + $self->{options}{'_default_break'} .= " <preface>"; + + # prefaceinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <prefaceinfo>"; + $self->{options}{'_default_placeholder'} .= " <prefaceinfo>"; + + # primary; contains text; + $self->{options}{'_default_translated'} .= " <primary>"; + $self->{options}{'_default_break'} .= " <primary>"; + + # primaryie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <primaryie>"; + $self->{options}{'_default_break'} .= " <primaryie>"; + + # printhistory; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <printhistory>"; + $self->{options}{'_default_break'} .= " <printhistory>"; + + # procedure; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <procedure>"; + $self->{options}{'_default_placeholder'} .= " <procedure>"; + + # production; doesnot contain text; + # NOTE: it might be better to have the production as verbatim + # Keeping the constrainst inline to have it close to the + # lhs or rhs. + $self->{options}{'_default_untranslated'} .= " <production>"; + $self->{options}{'_default_break'} .= " <production>"; + + # productionrecap; does not contain text; like production + $self->{options}{'_default_untranslated'} .= " <productionrecap>"; + $self->{options}{'_default_break'} .= " <productionrecap>"; + + # productionset; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <productionset>"; + $self->{options}{'_default_placeholder'} .= " <productionset>"; + + # productname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <productname>"; + $self->{options}{'_default_inline'} .= " <productname>"; + + # productnumber; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <productnumber>"; + $self->{options}{'_default_inline'} .= " <productnumber>"; + + # programlisting; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " W<programlisting>"; + $self->{options}{'_default_placeholder'} .= " <programlisting>"; + + # programlistingco; contains text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <programlistingco>"; + $self->{options}{'_default_placeholder'} .= " <programlistingco>"; + + # prompt; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <prompt>"; + $self->{options}{'_default_inline'} .= " <prompt>"; + + # property; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <property>"; + $self->{options}{'_default_inline'} .= " <property>"; + + # pubdate; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <pubdate>"; + $self->{options}{'_default_inline'} .= " <pubdate>"; + + # publisher; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <publisher>"; + $self->{options}{'_default_inline'} .= " <publisher>"; + + # publishername; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <publishername>"; + $self->{options}{'_default_inline'} .= " <publishername>"; + +# QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ + + # qandadiv; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <qandadiv>"; + $self->{options}{'_default_break'} .= " <qandadiv>"; + + # qandaentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <qandaentry>"; + $self->{options}{'_default_break'} .= " <qandaentry>"; + + # qandaset; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <qandaset>"; + $self->{options}{'_default_break'} .= " <qandaset>"; + + # question; does not contain text; + $self->{options}{'_default_untranslated'} .= " <question>"; + $self->{options}{'_default_break'} .= " <question>"; + + # quote; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <quote>"; + $self->{options}{'_default_inline'} .= " <quote>"; + +# RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR + + # refclass; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <refclass>"; + $self->{options}{'_default_break'} .= " <refclass>"; + + # refdescriptor; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <refdescriptor>"; + $self->{options}{'_default_break'} .= " <refdescriptor>"; + + # refentry; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refentry>"; + $self->{options}{'_default_break'} .= " <refentry>"; + + # refentryinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refentryinfo>"; + $self->{options}{'_default_placeholder'} .= " <refentryinfo>"; + + # refentrytitle; contains text; Formatted as a displayed block +# FIXME: do not seems to be a block + $self->{options}{'_default_translated'} .= " <refentrytitle>"; + $self->{options}{'_default_inline'} .= " <refentrytitle>"; + + # reference; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <reference>"; + $self->{options}{'_default_break'} .= " <reference>"; + + # referenceinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <referenceinfo>"; + $self->{options}{'_default_placeholder'} .= " <referenceinfo>"; + + # refmeta; does not contains text; + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <refmeta>"; + $self->{options}{'_default_break'} .= " <refmeta>"; + + # refmiscinfo; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <refmiscinfo>"; + $self->{options}{'_default_break'} .= " <refmiscinfo>"; + + # refname; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <refname>"; + $self->{options}{'_default_break'} .= " <refname>"; + + # refnamediv; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refnamediv>"; + $self->{options}{'_default_break'} .= " <refnamediv>"; + + # refpurpose; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <refpurpose>"; + $self->{options}{'_default_inline'} .= " <refpurpose>"; + + # refsect1; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsect1>"; + $self->{options}{'_default_break'} .= " <refsect1>"; + + # refsect1info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsect1info>"; + $self->{options}{'_default_placeholder'} .= " <refsect1info>"; + + # refsect2; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsect2>"; + $self->{options}{'_default_break'} .= " <refsect2>"; + + # refsect2info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsect2info>"; + $self->{options}{'_default_placeholder'} .= " <refsect2info>"; + + # refsect3; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsect3>"; + $self->{options}{'_default_break'} .= " <refsect3>"; + + # refsect3info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsect3info>"; + $self->{options}{'_default_placeholder'} .= " <refsect3info>"; + + # refsection; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsection>"; + $self->{options}{'_default_break'} .= " <refsection>"; + + # refsectioninfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsectioninfo>"; + $self->{options}{'_default_placeholder'} .= " <refsectioninfo>"; + + # refsynopsisdiv; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <refsynopsisdiv>"; + $self->{options}{'_default_break'} .= " <refsynopsisdiv>"; + + # refsynopsisdivinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <refsynopsisdivinfo>"; + $self->{options}{'_default_placeholder'} .= " <refsynopsisdivinfo>"; + + # releaseinfo; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <releaseinfo>"; + $self->{options}{'_default_break'} .= " <releaseinfo>"; + + # remark; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <remark>"; + $self->{options}{'_default_inline'} .= " <remark>"; + + # replaceable; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <replaceable>"; + $self->{options}{'_default_inline'} .= " <replaceable>"; + + # returnvalue; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <returnvalue>"; + $self->{options}{'_default_inline'} .= " <returnvalue>"; + + # revdescription; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <revdescription>"; + $self->{options}{'_default_break'} .= " <revdescription>"; + + # revhistory; does not contain text; Formatted as a displayed block + $self->{options}{'_default_untranslated'} .= " <revhistory>"; + $self->{options}{'_default_break'} .= " <revhistory>"; + + # revision; does not contain text; + $self->{options}{'_default_untranslated'} .= " <revision>"; + $self->{options}{'_default_break'} .= " <revision>"; + + # revnumber; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <revnumber>"; + $self->{options}{'_default_inline'} .= " <revnumber>"; + + # revremark; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_translated'} .= " <revremark>"; + $self->{options}{'_default_break'} .= " <revremark>"; + + # rhs; contains text; Formatted as a displayed block. + # NOTE: it might be better to have the production as verbatim + # Keeping the constrainst inline to have it close to the + # lhs or rhs. + $self->{options}{'_default_translated'} .= " <rhs>"; + $self->{options}{'_default_break'} .= " <rhs>"; + + # row; does not contain text; + $self->{options}{'_default_untranslated'} .= " <row>"; + $self->{options}{'_default_break'} .= " <row>"; + +# SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS + + # sbr; does not contain text; line break + $self->{options}{'_default_untranslated'} .= " <sbr>"; + $self->{options}{'_default_break'} .= " <sbr>"; + + # screen; contains text; verbatim + $self->{options}{'_default_translated'} .= " W<screen>"; + $self->{options}{'_default_placeholder'} .= " <screen>"; + + # screenco; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <screenco>"; + $self->{options}{'_default_placeholder'} .= " <screenco>"; + + # screeninfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <screeninfo>"; + $self->{options}{'_default_placeholder'} .= " <screeninfo>"; + + # screenshot; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <screenshot>"; + $self->{options}{'_default_placeholder'} .= " <screenshot>"; + + # secondary; contains text; + $self->{options}{'_default_translated'} .= " <secondary>"; + $self->{options}{'_default_break'} .= " <secondary>"; + + # secondaryie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <secondaryie>"; + $self->{options}{'_default_break'} .= " <secondaryie>"; + + # sect1; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect1>"; + $self->{options}{'_default_break'} .= " <sect1>"; + + # sect1info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect1info>"; + $self->{options}{'_default_placeholder'} .= " <sect1info>"; + + # sect2; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect2>"; + $self->{options}{'_default_break'} .= " <sect2>"; + + # sect2info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect2info>"; + $self->{options}{'_default_placeholder'} .= " <sect2info>"; + + # sect3; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect3>"; + $self->{options}{'_default_break'} .= " <sect3>"; + + # sect3info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect3info>"; + $self->{options}{'_default_placeholder'} .= " <sect3info>"; + + # sect4; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect4>"; + $self->{options}{'_default_break'} .= " <sect4>"; + + # sect4info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect4info>"; + $self->{options}{'_default_placeholder'} .= " <sect4info>"; + + # sect5; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sect5>"; + $self->{options}{'_default_break'} .= " <sect5>"; + + # sect5info; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sect5info>"; + $self->{options}{'_default_placeholder'} .= " <sect5info>"; + + # section; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <section>"; + $self->{options}{'_default_break'} .= " <section>"; + + # sectioninfo; does not contain text; v3.1 -> v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sectioninfo>"; + $self->{options}{'_default_placeholder'} .= " <sectioninfo>"; + + # see; contains text; + $self->{options}{'_default_translated'} .= " <see>"; + $self->{options}{'_default_break'} .= " <see>"; + + # seealso; contains text; + $self->{options}{'_default_translated'} .= " <seealso>"; + $self->{options}{'_default_break'} .= " <seealso>"; + + # seealsoie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <seealsoie>"; + $self->{options}{'_default_break'} .= " <seealsoie>"; + + # seeie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <seeie>"; + $self->{options}{'_default_break'} .= " <seeie>"; + + # seg; contains text; + $self->{options}{'_default_translated'} .= " <seg>"; + $self->{options}{'_default_break'} .= " <seg>"; + + # seglistitem; does not contain text; + $self->{options}{'_default_untranslated'} .= " <seglistitem>"; + $self->{options}{'_default_break'} .= " <seglistitem>"; + + # segmentedlist; does not contain text; + $self->{options}{'_default_untranslated'} .= " <segmentedlist>"; + $self->{options}{'_default_break'} .= " <segmentedlist>"; + + # segtitle; contains text; + $self->{options}{'_default_translated'} .= " <segtitle>"; + $self->{options}{'_default_break'} .= " <segtitle>"; + + # seriesinfo; does not contain text; + # Removed in v4.0 + $self->{options}{'_default_untranslated'} .= " <seriesinfo>"; + $self->{options}{'_default_placeholder'} .= " <seriesinfo>"; + + # seriesvolnums; contains text; Formatted inline + # NOTE: could be in the break class + $self->{options}{'_default_translated'} .= " <seriesvolnums>"; + $self->{options}{'_default_inline'} .= " <seriesvolnums>"; + + # set; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <set>"; + $self->{options}{'_default_break'} .= " <set>"; + + # setindex; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <setindex>"; + $self->{options}{'_default_break'} .= " <setindex>"; + + # setindexinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <setindexinfo>"; + $self->{options}{'_default_placeholder'} .= " <setindexinfo>"; + + # setinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <setinfo>"; + $self->{options}{'_default_placeholder'} .= " <setinfo>"; + + # sgmltag; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <sgmltag>"; + $self->{options}{'_default_inline'} .= " <sgmltag>"; + + # shortaffil; contains text; Formatted inline or as a + # displayed block depending on context + $self->{options}{'_default_translated'} .= " <shortaffil>"; + $self->{options}{'_default_inline'} .= " <shortaffil>"; + + # shortcut; does not contain text; Formatted inline + $self->{options}{'_default_untranslated'} .= " <shortcut>"; + $self->{options}{'_default_inline'} .= " <shortcut>"; + + # sidebar; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <sidebar>"; + $self->{options}{'_default_break'} .= " <sidebar>"; + + # sidebarinfo; does not contain text; v4, not in v5 + $self->{options}{'_default_untranslated'} .= " <sidebarinfo>"; + $self->{options}{'_default_placeholder'} .= " <sidebarinfo>"; + + # simpara; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <simpara>"; + $self->{options}{'_default_break'} .= " <simpara>"; + + # simplelist; does not contain text; + $self->{options}{'_default_untranslated'} .= " <simplelist>"; + $self->{options}{'_default_inline'} .= " <simplelist>"; + + # simplemsgentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <simplemsgentry>"; + $self->{options}{'_default_break'} .= " <simplemsgentry>"; + + # simplesect; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <simplesect>"; + $self->{options}{'_default_break'} .= " <simplesect>"; + + # spanspec; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <spanspec>"; + $self->{options}{'_default_break'} .= " <spanspec>"; + + # state; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <state>"; + $self->{options}{'_default_inline'} .= " <state>"; + + # step; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <step>"; + $self->{options}{'_default_break'} .= " <step>"; + + # stepalternatives; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <stepalternatives>"; + $self->{options}{'_default_break'} .= " <stepalternatives>"; + + # street; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <street>"; + $self->{options}{'_default_inline'} .= " <street>"; + + # structfield; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <structfield>"; + $self->{options}{'_default_inline'} .= " <structfield>"; + + # structname; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <structname>"; + $self->{options}{'_default_inline'} .= " <structname>"; + + # subject; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <subject>"; + $self->{options}{'_default_break'} .= " <subject>"; + + # subjectset; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <subjectset>"; + $self->{options}{'_default_break'} .= " <subjectset>"; + + # subjectterm; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <subjectterm>"; + $self->{options}{'_default_break'} .= " <subjectterm>"; + + # subscript; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <subscript>"; + $self->{options}{'_default_inline'} .= " <subscript>"; + + # substeps; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <substeps>"; + $self->{options}{'_default_break'} .= " <substeps>"; + + # subtitle; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <subtitle>"; + $self->{options}{'_default_break'} .= " <subtitle>"; + + # superscript; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <superscript>"; + $self->{options}{'_default_inline'} .= " <superscript>"; + + # surname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <surname>"; + $self->{options}{'_default_inline'} .= " <surname>"; + +#svg:svg + + # symbol; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <symbol>"; + $self->{options}{'_default_inline'} .= " <symbol>"; + + # synopfragment; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <synopfragment>"; + $self->{options}{'_default_placeholder'} .= " <synopfragment>"; + + # synopfragmentref; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <synopfragmentref>"; + $self->{options}{'_default_inline'} .= " <synopfragmentref>"; + + # synopsis; contains text; verbatim + $self->{options}{'_default_translated'} .= " W<synopsis>"; + $self->{options}{'_default_placeholder'} .= " <synopsis>"; + + # systemitem; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <systemitem>"; + $self->{options}{'_default_inline'} .= " <systemitem>"; + +# TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT + + # table; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <table>"; + $self->{options}{'_default_placeholder'} .= " <table>"; + + # tag; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <tag>"; + $self->{options}{'_default_inline'} .= " <tag>"; + + # task; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <task>"; + $self->{options}{'_default_placeholder'} .= " <task>"; + + # taskprerequisites; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <taskprerequisites>"; + $self->{options}{'_default_break'} .= " <taskprerequisites>"; + + # taskrelated; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <taskrelated>"; + $self->{options}{'_default_break'} .= " <taskrelated>"; + + # tasksummary; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <tasksummary>"; + $self->{options}{'_default_break'} .= " <tasksummary>"; + + # tbody; does not contain text; + $self->{options}{'_default_untranslated'} .= " <tbody>"; + $self->{options}{'_default_break'} .= " <tbody>"; + + # td; contains text; + $self->{options}{'_default_translated'} .= " <td>"; + $self->{options}{'_default_break'} .= " <td>"; + + # term; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <term>"; + $self->{options}{'_default_break'} .= " <term>"; + + # termdef; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <termdef>"; + $self->{options}{'_default_inline'} .= " <termdef>"; + + # tertiary; contains text; Suppressed + $self->{options}{'_default_translated'} .= " <tertiary>"; + $self->{options}{'_default_placeholder'} .= " <tertiary>"; + + # tertiaryie; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tertiaryie>"; + $self->{options}{'_default_break'} .= " <tertiaryie>"; + + # textdata; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <textdata>"; + $self->{options}{'_default_break'} .= " <textdata>"; + $self->{options}{'_default_attributes'}.=' <textdata>fileref'; + + # textobject; does not contain text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_untranslated'} .= " <textobject>"; + $self->{options}{'_default_break'} .= " <textobject>"; + + # tfoot; does not contain text; + $self->{options}{'_default_untranslated'} .= " <tfoot>"; + $self->{options}{'_default_break'} .= " <tfoot>"; + + # tgroup; does not contain text; + $self->{options}{'_default_untranslated'} .= " <tgroup>"; + $self->{options}{'_default_break'} .= " <tgroup>"; + + # th; contains text; + $self->{options}{'_default_translated'} .= " <th>"; + $self->{options}{'_default_break'} .= " <th>"; + + # thead; does not contain text; + $self->{options}{'_default_untranslated'} .= " <thead>"; + $self->{options}{'_default_break'} .= " <thead>"; + + # tip; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <tip>"; + $self->{options}{'_default_break'} .= " <tip>"; + + # title; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <title>"; + $self->{options}{'_default_break'} .= " <title>"; + + # titleabbrev; contains text; Formatted inline or as a displayed block + # NOTE: could be in the inline class + $self->{options}{'_default_translated'} .= " <titleabbrev>"; + $self->{options}{'_default_break'} .= " <titleabbrev>"; + + # toc; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toc>"; + $self->{options}{'_default_break'} .= " <toc>"; + + # tocback; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tocback>"; + $self->{options}{'_default_break'} .= " <tocback>"; + + # tocchap; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tocchap>"; + $self->{options}{'_default_break'} .= " <tocchap>"; + + # tocdiv; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <tocdiv>"; + $self->{options}{'_default_break'} .= " <tocdiv>"; + + # tocentry; contains text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tocentry>"; + $self->{options}{'_default_break'} .= " <tocentry>"; + + # tocfront; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_translated'} .= " <tocfront>"; + $self->{options}{'_default_break'} .= " <tocfront>"; + + # toclevel1; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel1>"; + $self->{options}{'_default_break'} .= " <toclevel1>"; + + # toclevel2; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel2>"; + $self->{options}{'_default_break'} .= " <toclevel2>"; + + # toclevel3; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel3>"; + $self->{options}{'_default_break'} .= " <toclevel3>"; + + # toclevel4; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel4>"; + $self->{options}{'_default_break'} .= " <toclevel4>"; + + # toclevel5; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <toclevel5>"; + $self->{options}{'_default_break'} .= " <toclevel5>"; + + # tocpart; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <tocpart>"; + $self->{options}{'_default_break'} .= " <tocpart>"; + + # token; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <token>"; + $self->{options}{'_default_inline'} .= " <token>"; + + # tr; does not contain text; + $self->{options}{'_default_untranslated'} .= " <tr>"; + $self->{options}{'_default_break'} .= " <tr>"; + + # trademark; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <trademark>"; + $self->{options}{'_default_inline'} .= " <trademark>"; + + # type; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <type>"; + $self->{options}{'_default_inline'} .= " <type>"; + +# UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU + + # ulink; contains text; Formatted inline; v4, not in v5 + $self->{options}{'_default_translated'} .= " <ulink>"; + $self->{options}{'_default_inline'} .= " <ulink>"; + + # uri; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <uri>"; + $self->{options}{'_default_inline'} .= " <uri>"; + + # userinput; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <userinput>"; + $self->{options}{'_default_inline'} .= " <userinput>"; + +# VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV + + # varargs; empty element; + $self->{options}{'_default_untranslated'} .= " <varargs>"; + $self->{options}{'_default_inline'} .= " <varargs>"; + + # variablelist; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <variablelist>"; + $self->{options}{'_default_placeholder'} .= " <variablelist>"; + + # varlistentry; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <varlistentry>"; + $self->{options}{'_default_break'} .= " <varlistentry>"; + + # varname; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <varname>"; + $self->{options}{'_default_inline'} .= " <varname>"; + + # videodata; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_untranslated'} .= " <videodata>"; + $self->{options}{'_default_break'} .= " <videodata>"; + $self->{options}{'_default_attributes'}.=' <videodata>fileref'; + + # videoobject; contains text; Formatted inline or as a displayed block + $self->{options}{'_default_untranslated'} .= " <videoobject>"; + $self->{options}{'_default_break'} .= " <videoobject>"; + + # void; empty element; + $self->{options}{'_default_untranslated'} .= " <void>"; + $self->{options}{'_default_inline'} .= " <void>"; + + # volumenum; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <volumenum>"; + $self->{options}{'_default_inline'} .= " <volumenum>"; + +# WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW + + # warning; does not contain text; Formatted as a displayed block. + $self->{options}{'_default_untranslated'} .= " <warning>"; + $self->{options}{'_default_break'} .= " <warning>"; + + # wordasword; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <wordasword>"; + $self->{options}{'_default_inline'} .= " <wordasword>"; + +# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX + + # xref; empty element; + $self->{options}{'_default_untranslated'} .= " <xref>"; + $self->{options}{'_default_inline'} .= " <xref>"; + +# YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + + # year; contains text; Formatted inline + $self->{options}{'_default_translated'} .= " <year>"; + $self->{options}{'_default_inline'} .= " <year>"; + +# ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ + + $self->{options}{'_default_attributes'}.=' + lang + xml:lang'; + + $self->treat_options; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/po4a/lib/Locale/Po4a/Po.pm Thu Mar 12 15:43:56 2009 +0800 @@ -0,0 +1,1580 @@ +# Locale::Po4a::Po -- manipulation of po files +# $Id: Po.pm,v 1.95 2009-02-28 22:18:39 nekral-guest Exp $ +# +# This program is free software; you may redistribute it and/or modify it +# under the terms of GPL (see COPYING). + +############################################################################ +# Modules and declarations +############################################################################ + +=head1 NAME + +Locale::Po4a::Po - po file manipulation module + +=head1 SYNOPSIS + + use Locale::Po4a::Po; + my $pofile=Locale::Po4a::Po->new(); + + # Read po file + $pofile->read('file.po'); + + # Add an entry + $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour', + 'flags' => "wrap", 'reference'=>'file.c:46'); + + # Extract a translation + $pofile->gettext("Hello"); # returns 'bonjour' + + # Write back to a file + $pofile->write('otherfile.po'); + +=head1 DESCRIPTION + +Locale::Po4a::Po is a module that allows you to manipulate message +catalogs. You can load and write from/to a file (which extension is often +I<po>), you can build new entries on the fly or request for the translation +of a string. + +For a more complete description of message catalogs in the po format and +their use, please refer to the documentation of the gettext program. + +This module is part of the PO4A project, which objective is to use po files +(designed at origin to ease the translation of program messages) to +translate everything, including documentation (man page, info manual), +package description, debconf templates, and everything which may benefit +from this. + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +=over 4 + +=item porefs + +This specifies the reference format. It can be one of 'none' to not produce +any reference, 'noline' to not specify the line number, and 'full' to +include complete references. + +=back + +=cut + +use IO::File; + + +require Exporter; + +package Locale::Po4a::Po; +use DynaLoader; + +use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext); + +use subs qw(makespace); +use vars qw(@ISA @EXPORT_OK); +@ISA = qw(Exporter DynaLoader); +@EXPORT = qw(%debug); +@EXPORT_OK = qw(&move_po_if_needed); + +use Locale::Po4a::TransTractor; +# Try to use a C extension if present. +eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION"); + +use 5.006; +use strict; +use warnings; + +use Carp qw(croak); +use File::Path; # mkdir before write +use File::Copy; # move +use POSIX qw(strftime floor); +use Time::Local; + +use Encode; + +my @known_flags=qw(wrap no-wrap c-format fuzzy); + +our %debug=('canonize' => 0, + 'quote' => 0, + 'escape' => 0, + 'encoding' => 0, + 'filter' => 0); + +=head1 Functions about whole message catalogs + +=over 4 + +=item new() + +Creates a new message catalog. If an argument is provided, it's the name of +a po file we should load. + +=cut + +sub new { + my ($this, $options) = (shift, shift); + my $class = ref($this) || $this; + my $self = {}; + bless $self, $class; + $self->initialize($options); + + my $filename = shift; + $self->read($filename) if defined($filename) && length($filename); + return $self; +} + +# Return the numerical timezone (e.g. +0200) +# Neither the %z nor the %s formats of strftime are portable: +# '%s' is not supported on Solaris and '%z' indicates +# "2006-10-25 19:36E. Europe Standard Time" on MS Windows. +sub timezone { + my @g = gmtime(); + my @l = localtime(); + + my $diff; + $diff = floor(timelocal(@l)/60 +0.5); + $diff -= floor(timelocal(@g)/60 +0.5); + + my $h = floor($diff / 60) + $l[8]; # $l[8] indicates if we are currently + # in a daylight saving time zone + my $m = $diff%60; + + return sprintf "%+03d%02d\n", $h, $m; +} + +sub initialize { + my ($self, $options) = (shift, shift); + my $date = strftime("%Y-%m-%d %H:%M", localtime).timezone(); + chomp $date; +# $options = ref($options) || $options; + + $self->{options}{'porefs'}= 'full'; + $self->{options}{'msgid-bugs-address'}= undef; + $self->{options}{'copyright-holder'}= "Free Software Foundation, Inc."; + $self->{options}{'package-name'}= "PACKAGE"; + $self->{options}{'package-version'}= "VERSION"; + foreach my $opt (keys %$options) { + if ($options->{$opt}) { + die wrap_mod("po4a::po", + dgettext ("po4a", "Unknown option: %s"), $opt) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options->{$opt}; + } + } + $self->{options}{'porefs'} =~ /^(full|noline|none)$/ || + die wrap_mod("po4a::po", + dgettext ("po4a", + "Invalid value for option 'porefs' ('%s' is ". + "not one of 'full', 'noline' or 'none')"), + $self->{options}{'porefs'}); + + $self->{po}=(); + $self->{count}=0; # number of msgids in the PO + # count_doc: number of strings in the document + # (duplicate strings counted multiple times) + $self->{count_doc}=0; + $self->{header_comment}= + " SOME DESCRIPTIVE TITLE\n" + ." Copyright (C) YEAR ". + $self->{options}{'copyright-holder'}."\n" + ." This file is distributed under the same license ". + "as the ".$self->{options}{'package-name'}." package.\n" + ." FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n" + ."\n" + .", fuzzy"; +# $self->header_tag="fuzzy"; + $self->{header}=escape_text("Project-Id-Version: ". + $self->{options}{'package-name'}." ". + $self->{options}{'package-version'}."\n". + ((defined $self->{options}{'msgid-bugs-address'})? + "Report-Msgid-Bugs-To: ".$self->{options}{'msgid-bugs-address'}."\n": + ""). + "POT-Creation-Date: $date\n". + "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n". + "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n". + "Language-Team: LANGUAGE <LL\@li.org>\n". + "MIME-Version: 1.0\n". + "Content-Type: text/plain; charset=CHARSET\n". + "Content-Transfer-Encoding: ENCODING"); + + $self->{encoder}=find_encoding("ascii"); + + # To make stats about gettext hits + $self->stats_clear(); +} + +=item read($) + +Reads a po file (which name is given as argument). Previously existing +entries in self are not removed, the new ones are added to the end of the +catalog. + +=cut + +sub read { + my $self=shift; + my $filename=shift + or croak wrap_mod("po4a::po", + dgettext("po4a", + "Please provide a non-null filename")); + + my $fh; + if ($filename eq '-') { + $fh=*STDIN; + } else { + open $fh,"<$filename" + or croak wrap_mod("po4a::po", + dgettext("po4a", "Can't read from %s: %s"), + $filename, $!); + } + + ## Read paragraphs line-by-line + my $pofile=""; + my $textline; + while (defined ($textline = <$fh>)) { + $pofile .= $textline; + } +# close INPUT +# or croak (sprintf(dgettext("po4a", +# "Can't close %s after reading: %s"), +# $filename,$!)."\n"); + + my $linenum=0; + + foreach my $msg (split (/\n\n/,$pofile)) { + my ($msgid,$msgstr,$comment,$automatic,$reference,$flags,$buffer); + my ($msgid_plural, $msgstr_plural); + foreach my $line (split (/\n/,$msg)) { + $linenum++; + if ($line =~ /^#\. ?(.*)$/) { # Automatic comment + $automatic .= (defined($automatic) ? "\n" : "").$1; + + } elsif ($line =~ /^#: ?(.*)$/) { # reference + $reference .= (defined($reference) ? "\n" : "").$1; + + } elsif ($line =~ /^#, ?(.*)$/) { # flags + $flags .= (defined($flags) ? "\n" : "").$1; + + } elsif ($line =~ /^#(.*)$/) { # Translator comments + $comment .= (defined($comment) ? "\n" : "").($1||""); + + } elsif ($line =~ /^msgid (".*")$/) { # begin of msgid + $buffer = $1; + + } elsif ($line =~ /^msgid_plural (".*")$/) { + # begin of msgid_plural, end of msgid + + $msgid = $buffer; + $buffer = $1; + + } elsif ($line =~ /^msgstr (".*")$/) { + # begin of msgstr, end of msgid + + $msgid = $buffer; + $buffer = "$1"; + + } elsif ($line =~ /^msgstr\[([0-9]+)\] (".*")$/) { + # begin of msgstr[x], end of msgid_plural or msgstr[x-1] + + # Note: po4a cannot uses plural forms + # (no integer to use the plural form) + # * drop the msgstr[x] where x >= 2 + # * use msgstr[0] as the translation of msgid + # * use msgstr[1] as the translation of msgid_plural + + if ($1 eq "0") { + $msgid_plural = $buffer; + $buffer = "$2"; + } elsif ($1 eq "1") { + $msgstr = $buffer; + $buffer = "$2"; + } elsif ($1 eq "2") { + $msgstr_plural = $buffer; + warn wrap_ref_mod("$filename:$linenum", + "po4a::po", + dgettext("po4a", "Messages with more than 2 plural forms are not supported.")); + } + } elsif ($line =~ /^(".*")$/) { + # continuation of a line + $buffer .= "\n$1"; + + } else { + warn wrap_ref_mod("$filename:$linenum", + "po4a::po", + dgettext("po4a", "Strange line: -->%s<--"), + $line); + } + } + $linenum++; + if (defined $msgid_plural) { + $msgstr_plural=$buffer; + + $msgid = unquote_text($msgid) if (defined($msgid)); + $msgstr = unquote_text($msgstr) if (defined($msgstr)); + + $self->push_raw ('msgid' => $msgid, + 'msgstr' => $msgstr, + 'reference' => $reference, + 'flags' => $flags, + 'comment' => $comment, + 'automatic' => $automatic, + 'plural' => 0); + + $msgid_plural = unquote_text($msgid_plural) + if (defined($msgid_plural)); + $msgstr_plural = unquote_text($msgstr_plural) + if (defined($msgstr_plural)); + + $self->push_raw ('msgid' => $msgid_plural, + 'msgstr' => $msgstr_plural, + 'reference' => $reference, + 'flags' => $flags, + 'comment' => $comment, + 'automatic' => $automatic, + 'plural' => 1); + } else { + $msgstr=$buffer; + + $msgid = unquote_text($msgid) if (defined($msgid)); + $msgstr = unquote_text($msgstr) if (defined($msgstr)); + + $self->push_raw ('msgid' => $msgid, + 'msgstr' => $msgstr, + 'reference' => $reference, + 'flags' => $flags, + 'comment' => $comment, + 'automatic' => $automatic); + } + } +} + +=item write($) + +Writes the current catalog to the given file. + +=cut + +sub write{ + my $self=shift; + my $filename=shift + or croak dgettext("po4a","Can't write to a file without filename")."\n"; + + my $fh; + if ($filename eq '-') { + $fh=\*STDOUT; + } else { + # make sure the directory in which we should write the localized + # file exists + my $dir = $filename; + if ($dir =~ m|/|) { + $dir =~ s|/[^/]*$||; + + File::Path::mkpath($dir, 0, 0755) # Croaks on error + if (length ($dir) && ! -e $dir); + } + open $fh,">$filename" + or croak wrap_mod("po4a::po", + dgettext("po4a", "Can't write to %s: %s"), + $filename, $!); + } + + print $fh "".format_comment($self->{header_comment},"") + if defined($self->{header_comment}) && length($self->{header_comment}); + + print $fh "msgid \"\"\n"; + print $fh "msgstr ".quote_text($self->{header})."\n\n"; + + + my $buf_msgstr_plural; # USed to keep the first msgstr of plural forms + my $first=1; + foreach my $msgid ( sort { ($self->{po}{"$a"}{'pos'}) <=> + ($self->{po}{"$b"}{'pos'}) + } keys %{$self->{po}}) { + my $output=""; + + if ($first) { + $first=0; + } else { + $output .= "\n"; + } + + $output .= format_comment($self->{po}{$msgid}{'comment'},"") + if defined($self->{po}{$msgid}{'comment'}) + && length ($self->{po}{$msgid}{'comment'}); + if ( defined($self->{po}{$msgid}{'automatic'}) + && length ($self->{po}{$msgid}{'automatic'})) { + foreach my $comment (split(/\\n/,$self->{po}{$msgid}{'automatic'})) + { + $output .= format_comment($comment, ". ") + } + } + $output .= format_comment($self->{po}{$msgid}{'type'},". type: ") + if defined($self->{po}{$msgid}{'type'}) + && length ($self->{po}{$msgid}{'type'}); + $output .= format_comment($self->{po}{$msgid}{'reference'},": ") + if defined($self->{po}{$msgid}{'reference'}) + && length ($self->{po}{$msgid}{'reference'}); + $output .= "#, ". join(", ", sort split(/\s+/,$self->{po}{$msgid}{'flags'}))."\n" + if defined($self->{po}{$msgid}{'flags'}) + && length ($self->{po}{$msgid}{'flags'}); + + if (exists $self->{po}{$msgid}{'plural'}) { + if ($self->{po}{$msgid}{'plural'} == 0) { + if ($self->get_charset =~ /^utf-8$/i) { + my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); + $msgid = Encode::decode_utf8($msgid); + $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n"); + $buf_msgstr_plural = Encode::encode_utf8("msgstr[0] ".quote_text($msgstr)."\n"); + } else { + $output = "msgid ".quote_text($msgid)."\n"; + $buf_msgstr_plural = "msgstr[0] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; + } + } elsif ($self->{po}{$msgid}{'plural'} == 1) { +# TODO: there may be only one plural form + if ($self->get_charset =~ /^utf-8$/i) { + my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); + $msgid = Encode::decode_utf8($msgid); + $output = Encode::encode_utf8("msgid_plural ".quote_text($msgid)."\n"); + $output .= $buf_msgstr_plural; + $output .= Encode::encode_utf8("msgstr[1] ".quote_text($msgstr)."\n"); + $buf_msgstr_plural = ""; + } else { + $output = "msgid_plural ".quote_text($msgid)."\n"; + $output .= $buf_msgstr_plural; + $output .= "msgstr[1] ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; + } + } else { + die wrap_msg(dgettext("po4a","Can't write PO files with more than two plural forms.")); + } + } else { + if ($self->get_charset =~ /^utf-8$/i) { + my $msgstr = Encode::decode_utf8($self->{po}{$msgid}{'msgstr'}); + $msgid = Encode::decode_utf8($msgid); + $output .= Encode::encode_utf8("msgid ".quote_text($msgid)."\n"); + $output .= Encode::encode_utf8("msgstr ".quote_text($msgstr)."\n"); + } else { + $output .= "msgid ".quote_text($msgid)."\n"; + $output .= "msgstr ".quote_text($self->{po}{$msgid}{'msgstr'})."\n"; + } + } + + print $fh $output; + } +# print STDERR "$fh"; +# if ($filename ne '-') { +# close $fh +# or croak (sprintf(dgettext("po4a", +# "Can't close %s after writing: %s\n"), +# $filename,$!)); +# } +} + +=item write_if_needed($$) + +Like write, but if the PO or POT file already exists, the object will be +written in a temporary file which will be compared with the existing file +to check that the update is needed (this avoids to change a POT just to +update a line reference or the POT-Creation-Date field). + +=cut + +sub move_po_if_needed { + my ($new_po, $old_po, $backup) = (shift, shift, shift); + my $diff; + + if (-e $old_po) { + my $diff_ignore = "-I'^#:' " + ."-I'^\"POT-Creation-Date:' " + ."-I'^\"PO-Revision-Date:'"; + $diff = qx(diff -q $diff_ignore $old_po $new_po); + if ( $diff eq "" ) { + unlink $new_po + or die wrap_msg(dgettext("po4a","Can't unlink %s: %s."), + $new_po, $!); + # touch the old PO + my ($atime, $mtime) = (time,time); + utime $atime, $mtime, $old_po; + } else { + if ($backup) { + copy $old_po, $old_po."~" + or die wrap_msg(dgettext("po4a","Can't copy %s to %s: %s."), + $old_po, $old_po."~", $!); + } else { + } + move $new_po, $old_po + or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."), + $new_po, $old_po, $!); + } + } else { + move $new_po, $old_po + or die wrap_msg(dgettext("po4a","Can't move %s to %s: %s."), + $new_po, $old_po, $!); + } +} + +sub write_if_needed { + my $self=shift; + my $filename=shift + or croak dgettext("po4a","Can't write to a file without filename")."\n"; + + if (-e $filename) { + my ($tmp_filename); + (undef,$tmp_filename)=File::Temp->tempfile($filename."XXXX", + DIR => "/tmp", + OPEN => 0, + UNLINK => 0); + $self->write($tmp_filename); + move_po_if_needed($tmp_filename, $filename); + } else { + $self->write($filename); + } +} + +=item gettextize($$) + +This function produces one translated message catalog from two catalogs, an +original and a translation. This process is described in L<po4a(7)|po4a.7>, +section I<Gettextization: how does it work?>. + +=cut + +sub gettextize { + my $this = shift; + my $class = ref($this) || $this; + my ($poorig,$potrans)=(shift,shift); + + my $pores=Locale::Po4a::Po->new(); + + my $please_fail = 0; + my $toobad = dgettext("po4a", + "\nThe gettextization failed (once again). Don't give up, ". + "gettextizing is a subtle art, but this is only needed once ". + "to convert a project to the gorgeous luxus offered by po4a ". + "to translators.". + "\nPlease refer to the po4a(7) documentation, the section ". + "\"HOWTO convert a pre-existing translation to po4a?\" ". + "contains several hints to help you in your task"); + + # Don't fail right now when the entry count does not match. Instead, give + # it a try so that the user can see where we fail (which is probably where + # the problem is). + if ($poorig->count_entries_doc() > $potrans->count_entries_doc()) { + warn wrap_mod("po4a gettextize", dgettext("po4a", + "Original has more strings than the translation (%d>%d). ". + "Please fix it by editing the translated version to add ". + "some dummy entry."), + $poorig->count_entries_doc(), + $potrans->count_entries_doc()); + $please_fail = 1; + } elsif ($poorig->count_entries_doc() < $potrans->count_entries_doc()) { + warn wrap_mod("po4a gettextize", dgettext("po4a", + "Original has less strings than the translation (%d<%d). ". + "Please fix it by removing the extra entry from the ". + "translated file. You may need an addendum (cf po4a(7)) ". + "to reput the chunk in place after gettextization. A ". + "possible cause is that a text duplicated in the original ". + "is not translated the same way each time. Remove one of ". + "the translations, and you're fine."), + $poorig->count_entries_doc(), + $potrans->count_entries_doc()); + $please_fail = 1; + } + + if ( $poorig->get_charset =~ /^utf-8$/i ) { + $potrans->to_utf8; + $pores->set_charset("utf-8"); + } else { + if ($potrans->get_charset eq "CHARSET") { + $pores->set_charset("ascii"); + } else { + $pores->set_charset($potrans->get_charset); + } + } + print "Po character sets:\n". + " original=".$poorig->get_charset."\n". + " translated=".$potrans->get_charset."\n". + " result=".$pores->get_charset."\n" + if $debug{'encoding'}; + + for (my ($o,$t)=(0,0) ; + $o<$poorig->count_entries_doc() && $t<$potrans->count_entries_doc(); + $o++,$t++) { + # + # Extract some informations + + my ($orig,$trans)=($poorig->msgid_doc($o),$potrans->msgid_doc($t)); +# print STDERR "Matches [[$orig]]<<$trans>>\n"; + + my ($reforig,$reftrans)=($poorig->{po}{$orig}{'reference'}, + $potrans->{po}{$trans}{'reference'}); + my ($typeorig,$typetrans)=($poorig->{po}{$orig}{'type'}, + $potrans->{po}{$trans}{'type'}); + + # + # Make sure the type of both string exist + # + die wrap_mod("po4a gettextize", + "Internal error: type of original string number %s ". + "isn't provided", $o) + if ($typeorig eq ''); + + die wrap_mod("po4a gettextize", + "Internal error: type of translated string number %s ". + "isn't provided", $o) + if ($typetrans eq ''); + + # + # Make sure both type are the same + # + if ($typeorig ne $typetrans){ + $pores->write("gettextization.failed.po"); + die wrap_msg(dgettext("po4a", + "po4a gettextization: Structure disparity between ". + "original and translated files:\n". + "msgid (at %s) is of type '%s' while\n". + "msgstr (at %s) is of type '%s'.\n". + "Original text: %s\n". + "Translated text: %s\n". + "(result so far dumped to gettextization.failed.po)"). + "%s", + $reforig, $typeorig, + $reftrans, $typetrans, + $orig, + $trans, + $toobad); + } + + # + # Push the entry + # + my $flags; + if (defined $poorig->{po}{$orig}{'flags'}) { + $flags = $poorig->{po}{$orig}{'flags'}." fuzzy"; + } else { + $flags = "fuzzy"; + } + $pores->push_raw('msgid' => $orig, + 'msgstr' => $trans, + 'flags' => $flags, + 'type' => $typeorig, + 'reference' => $reforig, + 'conflict' => 1, + 'transref' => $potrans->{po}{$trans}{'reference'}) + unless (defined($pores->{po}{$orig}) + and ($pores->{po}{$orig}{'msgstr'} eq $trans)) + # FIXME: maybe we should be smarter about what reference should be + # sent to push_raw. + } + + # make sure we return a useful error message when entry count differ + die "$toobad\n" if $please_fail; + + return $pores; +} + +=item filter($) + +This function extracts a catalog from an existing one. Only the entries having +a reference in the given file will be placed in the resulting catalog. + +This function parses its argument, converts it to a perl function definition, +eval this definition and filter the fields for which this function returns +true. + +I love perl sometimes ;) + +=cut + +sub filter { + my $self=shift; + our $filter=shift; + + my $res; + $res = Locale::Po4a::Po->new(); + + # Parse the filter + our $code="sub apply { return "; + our $pos=0; + our $length = length $filter; + + # explode chars to parts. How to subscript a string in Perl? + our @filter = split(//,$filter); + + sub gloups { + my $fmt=shift; + my $space = ""; + for (1..$pos){ + $space .= ' '; + } + die wrap_msg("$fmt\n$filter\n$space^ HERE"); + } + sub showmethecode { + return unless $debug{'filter'}; + my $fmt=shift; + my $space=""; + for (1..$pos){ + $space .= ' '; + } + print STDERR "$filter\n$space^ $fmt\n";#"$code\n"; + } + + # I dream of a lex in perl :-/ + sub parse_expression { + showmethecode("Begin expression") + if $debug{'filter'}; + + gloups("Begin of expression expected, got '%s'",$filter[$pos]) + unless ($filter[$pos] eq '('); + $pos ++; # pass the '(' + if ($filter[$pos] eq '&') { + # AND + $pos++; + showmethecode("Begin of AND") + if $debug{'filter'}; + $code .= "("; + while (1) { + gloups ("Unfinished AND statement.") + if ($pos == $length); + parse_expression(); + if ($filter[$pos] eq '(') { + $code .= " && "; + } elsif ($filter[$pos] eq ')') { + last; # do not eat that char + } else { + gloups("End of AND or begin of sub-expression expected, got '%s'", $filter[$pos]); + } + } + $code .= ")"; + } elsif ($filter[$pos] eq '|') { + # OR + $pos++; + $code .= "("; + while (1) { + gloups("Unfinished OR statement.") + if ($pos == $length); + parse_expression(); + if ($filter[$pos] eq '(') { + $code .= " || "; + } elsif ($filter[$pos] eq ')') { + last; # do not eat that char + } else { + gloups("End of OR or begin of sub-expression expected, got '%s'",$filter[$pos]); + } + } + $code .= ")"; + } elsif ($filter[$pos] eq '!') { + # NOT + $pos++; + $code .= "(!"; + gloups("Missing sub-expression in NOT statement.") + if ($pos == $length); + parse_expression(); + $code .= ")"; + } else { + # must be an equal. Let's get field and argument + my ($field,$arg,$done); + $field = substr($filter,$pos); + gloups("EQ statement contains no '=' or invalid field name") + unless ($field =~ /([a-z]*)=/i); + $field = lc($1); + $pos += (length $field) + 1; + + # check that we've got a valid field name, + # and the number it referes to + # DO NOT CHANGE THE ORDER + my @names=qw(msgid msgstr reference flags comment automatic); + my $fieldpos; + for ($fieldpos = 0; + $fieldpos < scalar @names && $field ne $names[$fieldpos]; + $fieldpos++) {} + gloups("Invalid field name: %s",$field) + if $fieldpos == scalar @names; # not found + + # Now, get the argument value. It has to be between quotes, + # which can be escaped + # We point right on the first char of the argument + # (first quote already eaten) + my $escaped = 0; + my $quoted = 0; + if ($filter[$pos] eq '"') { + $pos++; + $quoted = 1; + } + showmethecode(($quoted?"Quoted":"Unquoted")." argument of field '$field'") + if $debug{'filter'}; + + while (!$done) { + gloups("Unfinished EQ argument.") + if ($pos == $length); + + if ($quoted) { + if ($filter[$pos] eq '\\') { + if ($escaped) { + $arg .= '\\'; + $escaped = 0; + } else { + $escaped = 1; + } + } elsif ($escaped) { + if ($filter[$pos] eq '"') { + $arg .= '"'; + $escaped = 0; + } else { + gloups("Invalid escape sequence in argument: '\\%s'",$filter[$pos]); + } + } else { + if ($filter[$pos] eq '"') { + $done = 1; + } else { + $arg .= $filter[$pos]; + } + } + } else { + if ($filter[$pos] eq ')') { + # counter the next ++ since we don't want to eat + # this char + $pos--; + $done = 1; + } else { + $arg .= $filter[$pos]; + } + } + $pos++; + } + # and now, add the code to check this equality + $code .= "(\$_[$fieldpos] =~ m/$arg/)"; + + } + showmethecode("End of expression") + if $debug{'filter'}; + gloups("Unfinished statement.") + if ($pos == $length); + gloups("End of expression expected, got '%s'",$filter[$pos]) + unless ($filter[$pos] eq ')'); + $pos++; + } + # And now, launch the beast, finish the function and use eval + # to construct this function. + # Ok, the lack of lexer is a fair price for the eval ;) + parse_expression(); + gloups("Garbage at the end of the expression") + if ($pos != $length); + $code .= "; }"; + print STDERR "CODE = $code\n" + if $debug{'filter'}; + eval $code; + die wrap_mod("po4a::po", dgettext("po4a", "Eval failure: %s"), $@) + if $@; + + for (my $cpt=(0) ; + $cpt<$self->count_entries(); + $cpt++) { + + my ($msgid,$ref,$msgstr,$flags,$type,$comment,$automatic); + + $msgid = $self->msgid($cpt); + $ref=$self->{po}{$msgid}{'reference'}; + + $msgstr= $self->{po}{$msgid}{'msgstr'}; + $flags = $self->{po}{$msgid}{'flags'}; + $type = $self->{po}{$msgid}{'type'}; + $comment = $self->{po}{$msgid}{'comment'}; + $automatic = $self->{po}{$msgid}{'automatic'}; + + # DO NOT CHANGE THE ORDER + $res->push_raw('msgid' => $msgid, + 'msgstr' => $msgstr, + 'flags' => $flags, + 'type' => $type, + 'reference' => $ref, + 'comment' => $comment, + 'automatic' => $automatic) + if (apply($msgid,$msgstr,$ref,$flags,$comment,$automatic)); + } + # delete the apply subroutine + # otherwise it will be redefined. + undef &apply; + return $res; +} + +=item to_utf8() + +Recodes to utf-8 the po's msgstrs. Does nothing if the charset is not +specified in the po file ("CHARSET" value), or if it's already utf-8 or +ascii. + +=cut + +sub to_utf8 { + my $this = shift; + my $charset = $this->get_charset(); + + unless ($charset eq "CHARSET" or + $charset =~ /^ascii$/i or + $charset =~ /^utf-8$/i) { + foreach my $msgid ( keys %{$this->{po}} ) { + Encode::from_to($this->{po}{$msgid}{'msgstr'}, $charset, "utf-8"); + } + $this->set_charset("utf-8"); + } +} + +=back + +=head1 Functions to use a message catalog for translations + +=over 4 + +=item gettext($%) + +Request the translation of the string given as argument in the current catalog. +The function returns the original (untranslated) string if the string was not +found. + +After the string to translate, you can pass a hash of extra +arguments. Here are the valid entries: + +=over + +=item wrap + +boolean indicating whether we can consider that whitespaces in string are +not important. If yes, the function canonizes the string before looking for +a translation, and wraps the result. + +=item wrapcol + +The column at which we should wrap (default: 76). + +=back + +=cut + +sub gettext { + my $self=shift; + my $text=shift; + my (%opt)=@_; + my $res; + + return "" unless defined($text) && length($text); # Avoid returning the header. + my $validoption="reference wrap wrapcol"; + my %validoption; + + map { $validoption{$_}=1 } (split(/ /,$validoption)); + foreach (keys %opt) { + Carp::confess "internal error: unknown arg $_.\n". + "Here are the valid options: $validoption.\n" + unless $validoption{$_}; + } + + $text=canonize($text) + if ($opt{'wrap'}); + + my $esc_text=escape_text($text); + + $self->{gettextqueries}++; + + if ( defined $self->{po}{$esc_text} + and defined $self->{po}{$esc_text}{'msgstr'} + and length $self->{po}{$esc_text}{'msgstr'} + and ( not defined $self->{po}{$esc_text}{'flags'} + or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/)) { + + $self->{gettexthits}++; + $res = unescape_text($self->{po}{$esc_text}{'msgstr'}); + if (defined $self->{po}{$esc_text}{'plural'}) { + if ($self->{po}{$esc_text}{'plural'} eq "0") { + warn wrap_mod("po4a gettextize", dgettext("po4a", + "'%s' is the singular form of a message, ". + "po4a will use the msgstr[0] translation (%s)."), + $esc_text, $res); + } else { + warn wrap_mod("po4a gettextize", dgettext("po4a", + "'%s' is the plural form of a message, ". + "po4a will use the msgstr[1] translation (%s)."), + $esc_text, $res); + } + } + } else { + $res = $text; + } + + if ($opt{'wrap'}) { + if ($self->get_charset =~ /^utf-8$/i) { + $res=Encode::decode_utf8($res); + $res=wrap ($res, $opt{'wrapcol'} || 76); + $res=Encode::encode_utf8($res); + } else { + $res=wrap ($res, $opt{'wrapcol'} || 76); + } + } +# print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n"; + return $res; +} + +=item stats_get() + +Returns statistics about the hit ratio of gettext since the last time that +stats_clear() was called. Please note that it's not the same +statistics than the one printed by msgfmt --statistic. Here, it's statistics +about recent usage of the po file, while msgfmt reports the status of the +file. Example of use: + + [some use of the po file to translate stuff] + + ($percent,$hit,$queries) = $pofile->stats_get(); + print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n"; + +=cut + +sub stats_get() { + my $self=shift; + my ($h,$q)=($self->{gettexthits},$self->{gettextqueries}); + my $p = ($q == 0 ? 100 : int($h/$q*10000)/100); + +# $p =~ s/\.00//; +# $p =~ s/(\..)0/$1/; + + return ( $p,$h,$q ); +} + +=item stats_clear() + +Clears the statistics about gettext hits. + +=cut + +sub stats_clear { + my $self = shift; + $self->{gettextqueries} = 0; + $self->{gettexthits} = 0; +} + +=back + +=head1 Functions to build a message catalog + +=over 4 + +=item push(%) + +Push a new entry at the end of the current catalog. The arguments should +form a hash table. The valid keys are: + +=over 4 + +=item msgid + +the string in original language. + +=item msgstr + +the translation. + +=item reference + +an indication of where this string was found. Example: file.c:46 (meaning +in 'file.c' at line 46). It can be a space-separated list in case of +multiple occurrences. + +=item comment + +a comment added here manually (by the translators). The format here is free. + +=item automatic + +a comment which was automatically added by the string extraction +program. See the I<--add-comments> option of the B<xgettext> program for +more information. + +=item flags + +space-separated list of all defined flags for this entry. + +Valid flags are: c-text, python-text, lisp-text, elisp-text, librep-text, +smalltalk-text, java-text, awk-text, object-pascal-text, ycp-text, +tcl-text, wrap, no-wrap and fuzzy. + +See the gettext documentation for their meaning. + +=item type + +This is mostly an internal argument: it is used while gettextizing +documents. The idea here is to parse both the original and the translation +into a po object, and merge them, using one's msgid as msgid and the +other's msgid as msgstr. To make sure that things get ok, each msgid in po +objects are given a type, based on their structure (like "chapt", "sect1", +"p" and so on in docbook). If the types of strings are not the same, that +means that both files do not share the same structure, and the process +reports an error. + +This information is written as automatic comment in the po file since this +gives to translators some context about the strings to translate. + +=item wrap + +boolean indicating whether whitespaces can be mangled in cosmetic +reformattings. If true, the string is canonized before use. + +This information is written to the po file using the 'wrap' or 'no-wrap' flag. + +=item wrapcol + +The column at which we should wrap (default: 76). + +This information is not written to the po file. + +=back + +=cut + +sub push { + my $self=shift; + my %entry=@_; + + my $validoption="wrap wrapcol type msgid msgstr automatic flags reference"; + my %validoption; + + map { $validoption{$_}=1 } (split(/ /,$validoption)); + foreach (keys %entry) { + Carp::confess "internal error: unknown arg $_.\n". + "Here are the valid options: $validoption.\n" + unless $validoption{$_}; + } + + unless ($entry{'wrap'}) { + $entry{'flags'} .= " no-wrap"; + } + if (defined ($entry{'msgid'})) { + $entry{'msgid'} = canonize($entry{'msgid'}) + if ($entry{'wrap'}); + + $entry{'msgid'} = escape_text($entry{'msgid'}); + } + if (defined ($entry{'msgstr'})) { + $entry{'msgstr'} = canonize($entry{'msgstr'}) + if ($entry{'wrap'}); + + $entry{'msgstr'} = escape_text($entry{'msgstr'}); + } + + $self->push_raw(%entry); +} + +# The same as push(), but assuming that msgid and msgstr are already escaped +sub push_raw { + my $self=shift; + my %entry=@_; + my ($msgid,$msgstr,$reference,$comment,$automatic,$flags,$type,$transref)= + ($entry{'msgid'},$entry{'msgstr'}, + $entry{'reference'},$entry{'comment'},$entry{'automatic'}, + $entry{'flags'},$entry{'type'},$entry{'transref'}); + my $keep_conflict = $entry{'conflict'}; + +# print STDERR "Push_raw\n"; +# print STDERR " msgid=>>>$msgid<<<\n" if $msgid; +# print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr; +# Carp::cluck " flags=$flags\n" if $flags; + + return unless defined($entry{'msgid'}); + + #no msgid => header definition + unless (length($entry{'msgid'})) { +# if (defined($self->{header}) && $self->{header} =~ /\S/) { +# warn dgettext("po4a","Redefinition of the header. ". +# "The old one will be discarded\n"); +# } FIXME: do that iff the header isn't the default one. + $self->{header}=$msgstr; + $self->{header_comment}=$comment; + my $charset = $self->get_charset; + if ($charset ne "CHARSET") { + $self->{encoder}=find_encoding($charset); + } else { + $self->{encoder}=find_encoding("ascii"); + } + return; + } + + if ($self->{options}{'porefs'} eq "none") { + $reference = ""; + } elsif ($self->{options}{'porefs'} eq "noline") { + $reference =~ s/:[0-9]*/:1/g; + } + + if (defined($self->{po}{$msgid})) { + warn wrap_mod("po4a::po", + dgettext("po4a","msgid defined twice: %s"), + $msgid) + if (0); # FIXME: put a verbose stuff + if ( defined $msgstr + and defined $self->{po}{$msgid}{'msgstr'} + and $self->{po}{$msgid}{'msgstr'} ne $msgstr) { + my $txt=quote_text($msgid); + my ($first,$second)= + (format_comment(". ",$self->{po}{$msgid}{'reference'}). + quote_text($self->{po}{$msgid}{'msgstr'}), + + format_comment(". ",$reference). + quote_text($msgstr)); + + if ($keep_conflict) { + if ($self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s) { + $msgstr = $self->{po}{$msgid}{'msgstr'}. + "\\n#-#-#-#-# $transref #-#-#-#-#\\n". + $msgstr; + } else { + $msgstr = "#-#-#-#-# ". + $self->{po}{$msgid}{'transref'}. + " #-#-#-#-#\\n". + $self->{po}{$msgid}{'msgstr'}."\\n". + "#-#-#-#-# $transref #-#-#-#-#\\n". + $msgstr; + } + # Every msgid will have the same list of references. + # Only keep the last list. + $self->{po}{$msgid}{'reference'} = ""; + } else { + warn wrap_msg(dgettext("po4a", + "Translations don't match for:\n". + "%s\n". + "-->First translation:\n". + "%s\n". + " Second translation:\n". + "%s\n". + " Old translation discarded."), + $txt,$first,$second); + } + } + } + if (defined $transref) { + $self->{po}{$msgid}{'transref'} = $transref; + } + if (defined $reference) { + if (defined $self->{po}{$msgid}{'reference'}) { + $self->{po}{$msgid}{'reference'} .= " ".$reference; + } else { + $self->{po}{$msgid}{'reference'} = $reference; + } + } + $self->{po}{$msgid}{'msgstr'} = $msgstr; + $self->{po}{$msgid}{'comment'} = $comment; + $self->{po}{$msgid}{'automatic'} = $automatic; + if (defined($self->{po}{$msgid}{'pos_doc'})) { + $self->{po}{$msgid}{'pos_doc'} .= " ".$self->{count_doc}++; + } else { + $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++; + } + unless (defined($self->{po}{$msgid}{'pos'})) { + $self->{po}{$msgid}{'pos'} = $self->{count}++; + } + $self->{po}{$msgid}{'type'} = $type; + $self->{po}{$msgid}{'plural'} = $entry{'plural'} + if defined $entry{'plural'}; + + if (defined($flags)) { + $flags = " $flags "; + $flags =~ s/,/ /g; + foreach my $flag (@known_flags) { + if ($flags =~ /\s$flag\s/) { # if flag to be set + unless ( defined($self->{po}{$msgid}{'flags'}) + && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/) { + # flag not already set + if (defined $self->{po}{$msgid}{'flags'}) { + $self->{po}{$msgid}{'flags'} .= " ".$flag; + } else { + $self->{po}{$msgid}{'flags'} = $flag; + } + } + } + } + } +# print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n"; + +} + +=back + +=head1 Miscellaneous functions + +=over 4 + +=item count_entries() + +Returns the number of entries in the catalog (without the header). + +=cut + +sub count_entries($) { + my $self=shift; + return $self->{count}; +} + +=item count_entries_doc() + +Returns the number of entries in document. If a string appears multiple times +in the document, it will be counted multiple times + +=cut + +sub count_entries_doc($) { + my $self=shift; + return $self->{count_doc}; +} + +=item msgid($) + +Returns the msgid of the given number. + +=cut + +sub msgid($$) { + my $self=shift; + my $num=shift; + + foreach my $msgid ( keys %{$self->{po}} ) { + return $msgid if ($self->{po}{$msgid}{'pos'} eq $num); + } + return undef; +} + +=item msgid_doc($) + +Returns the msgid with the given position in the document. + +=cut + +sub msgid_doc($$) { + my $self=shift; + my $num=shift; + + foreach my $msgid ( keys %{$self->{po}} ) { + foreach my $pos (split / /, $self->{po}{$msgid}{'pos_doc'}) { + return $msgid if ($pos eq $num); + } + } + return undef; +} + +=item get_charset() + +Returns the character set specified in the po header. If it hasn't been +set, it will return "CHARSET". + +=cut + +sub get_charset() { + my $self=shift; + + $self->{header} =~ /charset=(.*?)[\s\\]/; + + if (defined $1) { + return $1; + } else { + return "CHARSET"; + } +} + +=item set_charset($) + +This sets the character set of the po header to the value specified in its +first argument. If you never call this function (and no file with a specified +character set is read), the default value is left to "CHARSET". This value +doesn't change the behavior of this module, it's just used to fill that field +in the header, and to return it in get_charset(). + +=cut + +sub set_charset() { + my $self=shift; + + my ($newchar,$oldchar); + $newchar = shift; + $oldchar = $self->get_charset(); + + $self->{header} =~ s/$oldchar/$newchar/; + $self->{encoder}=find_encoding($newchar); +} + +#----[ helper functions ]--------------------------------------------------- + +# transforme the string from its po file representation to the form which +# should be used to print it +sub unescape_text { + my $text = shift; + + print STDERR "\nunescape [$text]====" if $debug{'escape'}; + $text = join("",split(/\n/,$text)); + $text =~ s/\\"/"/g; + # unescape newlines + # NOTE on \G: + # The following regular expression introduce newlines. + # Thus, ^ doesn't match all beginnings of lines. + # \G is a zero-width assertion that matches the position + # of the previous substitution with s///g. As every + # substitution ends by a newline, it always matches a + # position just after a newline. + $text =~ s/( # $1: + (\G|[^\\]) # beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + )\\n # and followed by an escaped newline + /$1\n/sgx; # single string, match globally, allow comments + # unescape tabulations + $text =~ s/( # $1: + (\G|[^\\])# beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + )\\t # and followed by an escaped tabulation + /$1\t/mgx; # multilines string, match globally, allow comments + # and unescape the escape character + $text =~ s/\\\\/\\/g; + print STDERR ">$text<\n" if $debug{'escape'}; + + return $text; +} + +# transform the string to its representation as it should be written in po +# files +sub escape_text { + my $text = shift; + + print STDERR "\nescape [$text]====" if $debug{'escape'}; + $text =~ s/\\/\\\\/g; + $text =~ s/"/\\"/g; + $text =~ s/\n/\\n/g; + $text =~ s/\t/\\t/g; + print STDERR ">$text<\n" if $debug{'escape'}; + + return $text; +} + +# put quotes around the string on each lines (without escaping it) +# It does also normalize the text (ie, make sure its representation is wraped +# on the 80th char, but without changing the meaning of the string) +sub quote_text { + my $string = shift; + + return '""' unless defined($string) && length($string); + + print STDERR "\nquote [$string]====" if $debug{'quote'}; + # break lines on newlines, if any + # see unescape_text for an explanation on \G + $string =~ s/( # $1: + (\G|[^\\]) # beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + \\n) # and followed by an escaped newline + /$1\n/sgx; # single string, match globally, allow comments + $string = wrap($string); + my @string = split(/\n/,$string); + $string = join ("\"\n\"",@string); + $string = "\"$string\""; + if (scalar @string > 1 && $string[0] ne '') { + $string = "\"\"\n".$string; + } + + print STDERR ">$string<\n" if $debug{'quote'}; + return $string; +} + +# undo the work of the quote_text function +sub unquote_text { + my $string = shift; + print STDERR "\nunquote [$string]====" if $debug{'quote'}; + $string =~ s/^""\\n//s; + $string =~ s/^"(.*)"$/$1/s; + $string =~ s/"\n"//gm; + # Note: an even number of '\' could precede \\n, but I could not build a + # document to test this + $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm; + $string =~ s|!!DUMMYPOPM!!|\\n|gm; + print STDERR ">$string<\n" if $debug{'quote'}; + return $string; +} + +# canonize the string: write it on only one line, changing consecutive +# whitespace to only one space. +# Warning, it changes the string and should only be called if the string is +# plain text +sub canonize { + my $text=shift; + print STDERR "\ncanonize [$text]====" if $debug{'canonize'}; + $text =~ s/^ *//s; + $text =~ s/^[ \t]+/ /gm; + # if ($text eq "\n"), it messed up the first string (header) + $text =~ s/\n/ /gm if ($text ne "\n"); + $text =~ s/([.)]) +/$1 /gm; + $text =~ s/([^.)]) */$1 /gm; + $text =~ s/ *$//s; + print STDERR ">$text<\n" if $debug{'canonize'}; + return $text; +} + +# wraps the string. We don't use Text::Wrap since it mangles whitespace at +# the end of splited line +sub wrap { + my $text=shift; + return "0" if ($text eq '0'); + my $col=shift || 76; + my @lines=split(/\n/,"$text"); + my $res=""; + my $first=1; + while (defined(my $line=shift @lines)) { + if ($first && length($line) > $col - 10) { + unshift @lines,$line; + $first=0; + next; + } + if (length($line) > $col) { + my $pos=rindex($line," ",$col); + while (substr($line,$pos-1,1) eq '.' && $pos != -1) { + $pos=rindex($line," ",$pos-1); + } + if ($pos == -1) { + # There are no spaces in the first $col chars, pick-up the + # first space + $pos = index($line," "); + } + if ($pos != -1) { + my $end=substr($line,$pos+1); + $line=substr($line,0,$pos+1); + if ($end =~ s/^( +)//) { + $line .= $1; + } + unshift @lines,$end; + } + } + $first=0; + $res.="$line\n"; + } + # Restore the original trailing spaces + $res =~ s/\s+$//s; + if ($text =~ m/(\s+)$/s) { + $res .= $1; + } + return $res; +} + +# outputs properly a '# ... ' line to be put in the po file +sub format_comment { + my $comment=shift; + my $char=shift; + my $result = "#". $char . $comment; + $result =~ s/\n/\n#$char/gs; + $result =~ s/^#$char$/#/gm; + $result .= "\n"; + return $result; +} + + +1; +__END__ + +=back + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=cut
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/po4a/lib/Locale/Po4a/TransTractor.pm Thu Mar 12 15:43:56 2009 +0800 @@ -0,0 +1,1100 @@ +#!/usr/bin/perl -w + +require Exporter; + +package Locale::Po4a::TransTractor; +use DynaLoader; + +use 5.006; +use strict; +use warnings; + +use subs qw(makespace); +use vars qw($VERSION @ISA @EXPORT); +$VERSION="0.36"; +@ISA = qw(DynaLoader); +@EXPORT = qw(new process translate + read write readpo writepo + getpoout setpoout); + +# Try to use a C extension if present. +eval("bootstrap Locale::Po4a::TransTractor $VERSION"); + +use Carp qw(croak); +use Locale::Po4a::Po; +use Locale::Po4a::Common; + +use File::Path; # mkdir before write + +use Encode; +use Encode::Guess; + +=head1 NAME + +Locale::Po4a::TransTractor - Generic trans(lator ex)tractor. + +=head1 DESCRIPTION + +The po4a (po for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +This class is the ancestor of every po4a parsers used to parse a document to +search translatable strings, extract them to a po file and replace them by +their translation in the output document. + +More formally, it takes the following arguments as input: + +=over 2 + +=item - + +a document to translate ; + +=item - + +a po file containing the translations to use. + +=back + +As output, it produces: + +=over 2 + +=item - + +another po file, resulting of the extraction of translatable strings from +the input document ; + +=item - + +a translated document, with the same structure than the one in input, but +with all translatable strings replaced with the translations found in the +po file provided in input. + +=back + +Here is a graphical representation of this: + + Input document --\ /---> Output document + \ / (translated) + +-> parse() function -----+ + / \ + Input po --------/ \---> Output po + (extracted) + +=head1 FUNCTIONS YOUR PARSER SHOULD OVERRIDE + +=over 4 + +=item parse() + +This is where all the work takes place: the parsing of input documents, the +generation of output, and the extraction of the translatable strings. This +is pretty simple using the provided functions presented in the section +"INTERNAL FUNCTIONS" below. See also the synopsis, which present an +example. + +This function is called by the process() function bellow, but if you choose +to use the new() function, and to add content manually to your document, +you will have to call this function yourself. + +=item docheader() + +This function returns the header we should add to the produced document, +quoted properly to be a comment in the target language. See the section +"Educating developers about translations", from L<po4a(7)|po4a.7>, for what +it is good for. + +=back + +=cut + +sub docheader {} + +sub parse {} + +=head1 SYNOPSIS + +The following example parses a list of paragraphs beginning with "<p>". For the sake +of simplicity, we assume that the document is well formatted, i.e. that '<p>' +tags are the only tags present, and that this tag is at the very beginning +of each paragraph. + + sub parse { + my $self = shift; + + PARAGRAPH: while (1) { + my ($paragraph,$pararef)=("",""); + my $first=1; + my ($line,$lref)=$self->shiftline(); + while (defined($line)) { + if ($line =~ m/<p>/ && !$first--; ) { + # Not the first time we see <p>. + # Reput the current line in input, + # and put the built paragraph to output + $self->unshiftline($line,$lref); + + # Now that the document is formed, translate it: + # - Remove the leading tag + $paragraph =~ s/^<p>//s; + + # - push to output the leading tag (untranslated) and the + # rest of the paragraph (translated) + $self->pushline( "<p>" + . $document->translate($paragraph,$pararef) + ); + + next PARAGRAPH; + } else { + # Append to the paragraph + $paragraph .= $line; + $pararef = $lref unless(length($pararef)); + } + + # Reinit the loop + ($line,$lref)=$self->shiftline(); + } + # Did not get a defined line? End of input file. + return; + } + } + +Once you've implemented the parse function, you can use your document +class, using the public interface presented in the next section. + +=head1 PUBLIC INTERFACE for scripts using your parser + +=head2 Constructor + +=over 4 + +=item process(%) + +This function can do all you need to do with a po4a document in one +invocation. Its arguments must be packed as a hash. ACTIONS: + +=over 3 + +=item a. + +Reads all the po files specified in po_in_name + +=item b. + +Reads all original documents specified in file_in_name + +=item c. + +Parses the document + +=item d. + +Reads and applies all the addenda specified + +=item e. + +Writes the translated document to file_out_name (if given) + +=item f. + +Writes the extracted po file to po_out_name (if given) + +=back + +ARGUMENTS, beside the ones accepted by new() (with expected type): + +=over 4 + +=item file_in_name (@) + +List of filenames where we should read the input document. + +=item file_in_charset ($) + +Charset used in the input document (if it isn't specified, it will try +to detect it from the input document). + +=item file_out_name ($) + +Filename where we should write the output document. + +=item file_out_charset ($) + +Charset used in the output document (if it isn't specified, it will use +the po file charset). + +=item po_in_name (@) + +List of filenames where we should read the input po files from, containing +the translation which will be used to translate the document. + +=item po_out_name ($) + +Filename where we should write the output po file, containing the strings +extracted from the input document. + +=item addendum (@) + +List of filenames where we should read the addenda from. + +=item addendum_charset ($) + +Charset for the addenda. + +=back + +=item new(%) + +Create a new Po4a document. Accepted options (but be in a hash): + +=over 4 + +=item verbose ($) + +Sets the verbosity. + +=item debug ($) + +Sets the debugging. + +=back + +=cut + +sub process { + ## Determine if we were called via an object-ref or a classname + my $self = shift; + + ## Any remaining arguments are treated as initial values for the + ## hash that is used to represent this object. + my %params = @_; + + # Build the args for new() + my %newparams = (); + foreach (keys %params) { + next if ($_ eq 'po_in_name' || + $_ eq 'po_out_name' || + $_ eq 'file_in_name' || + $_ eq 'file_in_charset' || + $_ eq 'file_out_name' || + $_ eq 'file_out_charset' || + $_ eq 'addendum' || + $_ eq 'addendum_charset'); + $newparams{$_}=$params{$_}; + } + + $self->detected_charset($params{'file_in_charset'}); + $self->{TT}{'file_out_charset'}=$params{'file_out_charset'}; + if (defined($self->{TT}{'file_out_charset'}) and + length($self->{TT}{'file_out_charset'})) { + $self->{TT}{'file_out_encoder'} = find_encoding($self->{TT}{'file_out_charset'}); + } + $self->{TT}{'addendum_charset'}=$params{'addendum_charset'}; + + foreach my $file (@{$params{'po_in_name'}}) { + print STDERR "readpo($file)... " if $self->debug(); + $self->readpo($file); + print STDERR "done.\n" if $self->debug() + } + foreach my $file (@{$params{'file_in_name'}}) { + print STDERR "read($file)..." if $self->debug(); + $self->read($file); + print STDERR "done.\n" if $self->debug(); + } + print STDERR "parse..." if $self->debug(); + $self->parse(); + print STDERR "done.\n" if $self->debug(); + foreach my $file (@{$params{'addendum'}}) { + print STDERR "addendum($file)..." if $self->debug(); + $self->addendum($file) || die "An addendum failed\n"; + print STDERR "done.\n" if $self->debug(); + } + if (defined $params{'file_out_name'}) { + print STDERR "write(".$params{'file_out_name'}.")... " + if $self->debug(); + $self->write($params{'file_out_name'}); + print STDERR "done.\n" if $self->debug(); + } + if (defined $params{'po_out_name'}) { + print STDERR "writepo(".$params{'po_out_name'}.")... " + if $self->debug(); + $self->writepo($params{'po_out_name'}); + print STDERR "done.\n" if $self->debug(); + } + return $self; +} + +sub new { + ## Determine if we were called via an object-ref or a classname + my $this = shift; + my $class = ref($this) || $this; + my $self = { }; + my %options=@_; + ## Bless ourselves into the desired class and perform any initialization + bless $self, $class; + + ## initialize the plugin + # prevent the plugin from croaking on the options intended for Po.pm + $self->{options}{'porefs'} = ''; + # let the plugin parse the options and such + $self->initialize(%options); + + ## Create our private data + my %po_options; + $po_options{'porefs'} = $self->{options}{'porefs'}; + + # private data + $self->{TT}=(); + $self->{TT}{po_in}=Locale::Po4a::Po->new(); + $self->{TT}{po_out}=Locale::Po4a::Po->new(\%po_options); + # Warning, this is an array of array: + # The document is splited on lines, and for each + # [0] is the line content, [1] is the reference [2] the type + $self->{TT}{doc_in}=(); + $self->{TT}{doc_out}=(); + if (defined $options{'verbose'}) { + $self->{TT}{verbose} = $options{'verbose'}; + } + if (defined $options{'debug'}) { + $self->{TT}{debug} = $options{'debug'}; + } + # Input document is in ascii until we prove the opposite (in read()) + $self->{TT}{ascii_input}=1; + # We try not to use utf unless it's forced from the outside (in case the + # document isn't in ascii) + $self->{TT}{utf_mode}=0; + + + return $self; +} + +=back + +=head2 Manipulating document files + +=over 4 + +=item read($) + +Add another input document at the end of the existing one. The argument is +the filename to read. + +Please note that it does not parse anything. You should use the parse() +function when you're done with packing input files into the document. + +=cut + +#' +sub read() { + my $self=shift; + my $filename=shift + or croak wrap_msg(dgettext("po4a", "Can't read from file without having a filename")); + my $linenum=0; + + open INPUT,"<$filename" + or croak wrap_msg(dgettext("po4a", "Can't read from %s: %s"), $filename, $!); + while (defined (my $textline = <INPUT>)) { + $linenum++; + my $ref="$filename:$linenum"; + my @entry=($textline,$ref); + push @{$self->{TT}{doc_in}}, @entry; + + if (!defined($self->{TT}{'file_in_charset'})) { + # Detect if this file has non-ascii characters + if($self->{TT}{ascii_input}) { + my $decoder = guess_encoding($textline); + if (!ref($decoder) or $decoder !~ /Encode::XS=/) { + # We have detected a non-ascii line + $self->{TT}{ascii_input} = 0; + # Save the reference for future error message + $self->{TT}{non_ascii_ref} ||= $ref; + } + } + } + } + close INPUT + or croak wrap_msg(dgettext("po4a", "Can't close %s after reading: %s"), $filename, $!); + +} + +=item write($) + +Write the translated document to the given filename. + +=cut + +sub write { + my $self=shift; + my $filename=shift + or croak wrap_msg(dgettext("po4a", "Can't write to a file without filename")); + + my $fh; + if ($filename eq '-') { + $fh=\*STDOUT; + } else { + # make sure the directory in which we should write the localized file exists + my $dir = $filename; + if ($dir =~ m|/|) { + $dir =~ s|/[^/]*$||; + + File::Path::mkpath($dir, 0, 0755) # Croaks on error + if (length ($dir) && ! -e $dir); + } + open $fh,">$filename" + or croak wrap_msg(dgettext("po4a", "Can't write to %s: %s"), $filename, $!); + } + + map { print $fh $_ } $self->docheader(); + map { print $fh $_ } @{$self->{TT}{doc_out}}; + + if ($filename ne '-') { + close $fh or croak wrap_msg(dgettext("po4a", "Can't close %s after writing: %s"), $filename, $!); + } + +} + +=back + +=head2 Manipulating po files + +=over 4 + +=item readpo($) + +Add the content of a file (which name is passed in argument) to the +existing input po. The old content is not discarded. + +=item writepo($) + +Write the extracted po file to the given filename. + +=item stats() + +Returns some statistics about the translation done so far. Please note that +it's not the same statistics than the one printed by msgfmt +--statistic. Here, it's stats about recent usage of the po file, while +msgfmt reports the status of the file. It is a wrapper to the +Locale::Po4a::Po::stats_get function applied to the input po file. Example +of use: + + [normal use of the po4a document...] + + ($percent,$hit,$queries) = $document->stats(); + print "We found translations for $percent\% ($hit from $queries) of strings.\n"; + +=back + +=cut + +sub getpoout { + return $_[0]->{TT}{po_out}; +} +sub setpoout { + $_[0]->{TT}{po_out} = $_[1]; +} +sub readpo { + $_[0]->{TT}{po_in}->read($_[1]); +} +sub writepo { + $_[0]->{TT}{po_out}->write( $_[1] ); +} +sub stats { + return $_[0]->{TT}{po_in}->stats_get(); +} + +=head2 Manipulating addenda + +=over 4 + +=item addendum($) + +Please refer to L<po4a(7)|po4a.7> for more information on what addenda are, +and how translators should write them. To apply an addendum to the translated +document, simply pass its filename to this function and you are done ;) + +This function returns a non-null integer on error. + +=cut + +# Internal function to read the header. +sub addendum_parse { + my ($filename,$header)=shift; + + my ($errcode,$mode,$position,$boundary,$bmode,$content)= + (1,"","","","",""); + + unless (open (INS, "<$filename")) { + warn wrap_msg(dgettext("po4a", "Can't read from %s: %s"), $filename, $!); + goto END_PARSE_ADDFILE; + } + + unless (defined ($header=<INS>) && $header) { + warn wrap_msg(dgettext("po4a", "Can't read Po4a header from %s."), $filename); + goto END_PARSE_ADDFILE; + } + + unless ($header =~ s/PO4A-HEADER://i) { + warn wrap_msg(dgettext("po4a", "First line of %s does not look like a Po4a header."), $filename); + goto END_PARSE_ADDFILE; + } + foreach my $part (split(/;/,$header)) { + unless ($part =~ m/^\s*([^=]*)=(.*)$/) { + warn wrap_msg(dgettext("po4a", "Syntax error in Po4a header of %s, near \"%s\""), $filename, $part); + goto END_PARSE_ADDFILE; + } + my ($key,$value)=($1,$2); + $key=lc($key); + if ($key eq 'mode') { $mode=lc($value); + } elsif ($key eq 'position') { $position=$value; + } elsif ($key eq 'endboundary') { + $boundary=$value; + $bmode='after'; + } elsif ($key eq 'beginboundary') { + $boundary=$value; + $bmode='before'; + } else { + warn wrap_msg(dgettext("po4a", "Invalid argument in the Po4a header of %s: %s"), $filename, $key); + goto END_PARSE_ADDFILE; + } + } + + unless (length($mode)) { + warn wrap_msg(dgettext("po4a", "The Po4a header of %s does not define the mode."), $filename); + goto END_PARSE_ADDFILE; + } + unless ($mode eq "before" || $mode eq "after") { + warn wrap_msg(dgettext("po4a", "Mode invalid in the Po4a header of %s: should be 'before' or 'after' not %s."), $filename, $mode); + goto END_PARSE_ADDFILE; + } + + unless (length($position)) { + warn wrap_msg(dgettext("po4a", "The Po4a header of %s does not define the position."), $filename); + goto END_PARSE_ADDFILE; + } + unless ($mode eq "before" || length($boundary)) { + warn wrap_msg(dgettext("po4a", "No ending boundary given in the Po4a header, but mode=after.")); + goto END_PARSE_ADDFILE; + } + + while (defined(my $line = <INS>)) { + $content .= $line; + } + close INS; + + $errcode=0; + END_PARSE_ADDFILE: + return ($errcode,$mode,$position,$boundary,$bmode,$content); +} + +sub mychomp { + my ($str) = shift; + chomp($str); + return $str; +} + +sub addendum { + my ($self,$filename) = @_; + + print STDERR "Apply addendum $filename..." if $self->debug(); + unless ($filename) { + warn wrap_msg(dgettext("po4a", + "Can't apply addendum when not given the filename")); + return 0; + } + die wrap_msg(dgettext("po4a", "Addendum %s does not exist."), $filename) + unless -e $filename; + + my ($errcode,$mode,$position,$boundary,$bmode,$content)= + addendum_parse($filename); + return 0 if ($errcode); + + print STDERR "mode=$mode;pos=$position;bound=$boundary;bmode=$bmode;ctn=$content\n" + if $self->debug(); + + # We only recode the addendum if an origin charset is specified, else we + # suppose it's already in the output document's charset + if (defined($self->{TT}{'addendum_charset'}) && + length($self->{TT}{'addendum_charset'})) { + Encode::from_to($content,$self->{TT}{'addendum_charset'}, + $self->get_out_charset); + } + + my $found = scalar grep { /$position/ } @{$self->{TT}{doc_out}}; + if ($found == 0) { + warn wrap_msg(dgettext("po4a", + "No candidate position for the addendum %s."), $filename); + return 0; + } + if ($found > 1) { + warn wrap_msg(dgettext("po4a", + "More than one candidate position found for the addendum %s."), $filename); + return 0; + } + + if ($mode eq "before") { + if ($self->verbose() > 1 || $self->debug() ) { + map { print STDERR wrap_msg(dgettext("po4a", "Addendum '%s' applied before this line: %s"), $filename, $_) if (/$position/); + } @{$self->{TT}{doc_out}}; + } + @{$self->{TT}{doc_out}} = map { /$position/ ? ($content,$_) : $_ + } @{$self->{TT}{doc_out}}; + } else { + my @newres=(); + + do { + # make sure it doesnt whine on empty document + my $line = scalar @{$self->{TT}{doc_out}} ? shift @{$self->{TT}{doc_out}} : ""; + push @newres,$line; + my $outline=mychomp($line); + $outline =~ s/^[ \t]*//; + + if ($line =~ m/$position/) { + while ($line=shift @{$self->{TT}{doc_out}}) { + last if ($line=~/$boundary/); + push @newres,$line; + } + if (defined $line) { + if ($bmode eq 'before') { + print wrap_msg(dgettext("po4a", + "Addendum '%s' applied before this line: %s"), + $filename, $outline) + if ($self->verbose() > 1 || $self->debug()); + push @newres,$content; + push @newres,$line; + } else { + print wrap_msg(dgettext("po4a", + "Addendum '%s' applied after the line: %s."), + $filename, $outline) + if ($self->verbose() > 1 || $self->debug()); + push @newres,$line; + push @newres,$content; + } + } else { + print wrap_msg(dgettext("po4a", "Addendum '%s' applied at the end of the file."), $filename) + if ($self->verbose() > 1 || $self->debug()); + push @newres,$content; + } + } + } while (scalar @{$self->{TT}{doc_out}}); + @{$self->{TT}{doc_out}} = @newres; + } + print STDERR "done.\n" if $self->debug(); + return 1; +} + +=back + +=head1 INTERNAL FUNCTIONS used to write derivated parsers + +=head2 Getting input, providing output + +Four functions are provided to get input and return output. They are very +similar to shift/unshift and push/pop. The first pair is about input, while +the second is about output. Mnemonic: in input, you are interested in the +first line, what shift gives, and in output you want to add your result at +the end, like push does. + +=over 4 + +=item shiftline() + +This function returns the next line of the doc_in to be parsed and its +reference (packed as an array). + +=item unshiftline($$) + +Unshifts a line of the input document and its reference. + +=item pushline($) + +Push a new line to the doc_out. + +=item popline() + +Pop the last pushed line from the doc_out. + +=back + +=cut + +sub shiftline { + my ($line,$ref)=(shift @{$_[0]->{TT}{doc_in}}, + shift @{$_[0]->{TT}{doc_in}}); + return ($line,$ref); +} +sub unshiftline { + my $self = shift; + unshift @{$self->{TT}{doc_in}},@_; +} + +sub pushline { push @{$_[0]->{TT}{doc_out}}, $_[1] if defined $_[1]; } +sub popline { return pop @{$_[0]->{TT}{doc_out}}; } + +=head2 Marking strings as translatable + +One function is provided to handle the text which should be translated. + +=over 4 + +=item translate($$$) + +Mandatory arguments: + +=over 2 + +=item - + +A string to translate + +=item - + +The reference of this string (ie, position in inputfile) + +=item - + +The type of this string (ie, the textual description of its structural role +; used in Locale::Po4a::Po::gettextization() ; see also L<po4a(7)|po4a.7>, +section I<Gettextization: how does it work?>) + +=back + +This function can also take some extra arguments. They must be organized as +a hash. For example: + + $self->translate("string","ref","type", + 'wrap' => 1); + +=over + +=item wrap + +boolean indicating whether we can consider that whitespaces in string are +not important. If yes, the function canonizes the string before looking for +a translation or extracting it, and wraps the translation. + +=item wrapcol + +The column at which we should wrap (default: 76). + +=item comment + +An extra comment to add to the entry. + +=back + +Actions: + +=over 2 + +=item - + +Pushes the string, reference and type to po_out. + +=item - + +Returns the translation of the string (as found in po_in) so that the +parser can build the doc_out. + +=item - + +Handles the charsets to recode the strings before sending them to +po_out and before returning the translations. + +=back + +=back + +=cut + +sub translate { + my $self=shift; + my ($string,$ref,$type)=(shift,shift,shift); + my (%options)=@_; + + # my $validoption="wrap wrapcol"; + # my %validoption; + + return "" unless defined($string) && length($string); + + # map { $validoption{$_}=1 } (split(/ /,$validoption)); + # foreach (keys %options) { + # Carp::confess "internal error: translate() called with unknown arg $_. Valid options: $validoption" + # unless $validoption{$_}; + # } + + my $in_charset; + if ($self->{TT}{ascii_input}) { + $in_charset = "ascii"; + } else { + if (defined($self->{TT}{'file_in_charset'}) and + length($self->{TT}{'file_in_charset'}) and + $self->{TT}{'file_in_charset'} !~ m/ascii/i) { + $in_charset=$self->{TT}{'file_in_charset'}; + } else { + # FYI, the document charset have to be determined *before* we see the first + # string to recode. + die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ascii char at %s)"), $self->{TT}{non_ascii_ref}) + } + } + + if ($self->{TT}{po_in}->get_charset ne "CHARSET") { + $string = encode_from_to($string, + $self->{TT}{'file_in_encoder'}, + $self->{TT}{po_in}{encoder}); + } + + if (defined $options{'wrapcol'} && $options{'wrapcol'} < 0) { +# FIXME: should be the parameter given with --width + $options{'wrapcol'} = 76 + $options{'wrapcol'}; + } + my $transstring = $self->{TT}{po_in}->gettext($string, + 'wrap' => $options{'wrap'}||0, + 'wrapcol' => $options{'wrapcol'}); + + if ($self->{TT}{po_in}->get_charset ne "CHARSET") { + my $out_encoder = $self->{TT}{'file_out_encoder'}; + unless (defined $out_encoder) { + $out_encoder = find_encoding($self->get_out_charset) + } + $transstring = encode_from_to($transstring, + $self->{TT}{po_in}{encoder}, + $out_encoder); + } + + # If the input document isn't completely in ascii, we should see what to + # do with the current string + unless ($self->{TT}{ascii_input}) { + my $out_charset = $self->{TT}{po_out}->get_charset; + # We set the output po charset + if ($out_charset eq "CHARSET") { + if ($self->{TT}{utf_mode}) { + $out_charset="utf-8"; + } else { + $out_charset=$in_charset; + } + $self->{TT}{po_out}->set_charset($out_charset); + } + if ( $in_charset !~ /^$out_charset$/i ) { + Encode::from_to($string,$in_charset,$out_charset); + if (defined($options{'comment'}) and length($options{'comment'})) { + Encode::from_to($options{'comment'},$in_charset,$out_charset); + } + } + } + + # the comments provided by the modules are automatic comments from the PO point of view + $self->{TT}{po_out}->push('msgid' => $string, + 'reference' => $ref, + 'type' => $type, + 'automatic' => $options{'comment'}, + 'wrap' => $options{'wrap'}||0, + 'wrapcol' => $options{'wrapcol'}); + +# if ($self->{TT}{po_in}->get_charset ne "CHARSET") { +# Encode::from_to($transstring,$self->{TT}{po_in}->get_charset, +# $self->get_out_charset); +# } + + if ($options{'wrap'}||0) { + $transstring =~ s/( *)$//s; + my $trailing_spaces = $1||""; + $transstring =~ s/ *$//gm; + $transstring .= $trailing_spaces; + } + + return $transstring; +} + +=head2 Misc functions + +=over 4 + +=item verbose() + +Returns if the verbose option was passed during the creation of the +TransTractor. + +=cut + +sub verbose { + if (defined $_[1]) { + $_[0]->{TT}{verbose} = $_[1]; + } else { + return $_[0]->{TT}{verbose} || 0; # undef and 0 have the same meaning, but one generates warnings + } +} + +=item debug() + +Returns if the debug option was passed during the creation of the +TransTractor. + +=cut + +sub debug { + return $_[0]->{TT}{debug}; +} + +=item detected_charset($) + +This tells TransTractor that a new charset (the first argument) has been +detected from the input document. It can usually be read from the document +header. Only the first charset will remain, coming either from the +process() arguments or detected from the document. + +=cut + +sub detected_charset { + my ($self,$charset)=(shift,shift); + unless (defined($self->{TT}{'file_in_charset'}) and + length($self->{TT}{'file_in_charset'}) ) { + $self->{TT}{'file_in_charset'}=$charset; + if (defined $charset) { + $self->{TT}{'file_in_encoder'}=find_encoding($charset); + } + } + + if (defined $self->{TT}{'file_in_charset'} and + length $self->{TT}{'file_in_charset'} and + $self->{TT}{'file_in_charset'} !~ m/ascii/i) { + $self->{TT}{ascii_input}=0; + } +} + +=item get_out_charset() + +This function will return the charset that should be used in the output +document (usually useful to substitute the input document's detected charset +where it has been found). + +It will use the output charset specified in the command line. If it wasn't +specified, it will use the input po's charset, and if the input po has the +default "CHARSET", it will return the input document's charset, so that no +encoding is performed. + +=cut + +sub get_out_charset { + my $self=shift; + my $charset; + + # Use the value specified at the command line + if (defined($self->{TT}{'file_out_charset'}) and + length($self->{TT}{'file_out_charset'})) { + $charset=$self->{TT}{'file_out_charset'}; + } else { + if ($self->{TT}{utf_mode} && $self->{TT}{ascii_input}) { + $charset="utf-8"; + } else { + $charset=$self->{TT}{po_in}->get_charset; + $charset=$self->{TT}{'file_in_charset'} + if $charset eq "CHARSET" and + defined($self->{TT}{'file_in_charset'}) and + length($self->{TT}{'file_in_charset'}); + $charset="ascii" + if $charset eq "CHARSET"; + } + } + return $charset; +} + +=item recode_skipped_text($) + +This function returns the recoded text passed as argument, from the input +document's charset to the output document's one. This isn't needed when +translating a string (translate() recodes everything itself), but it is when +you skip a string from the input document and you want the output document to +be consistent with the global encoding. + +=cut + +sub recode_skipped_text { + my ($self,$text)=(shift,shift); + unless ($self->{TT}{'ascii_input'}) { + if(defined($self->{TT}{'file_in_charset'}) and + length($self->{TT}{'file_in_charset'}) ) { + $text = encode_from_to($text, + $self->{TT}{'file_in_encoder'}, + find_encoding($self->get_out_charset)); + } else { + die wrap_mod("po4a", dgettext("po4a", "Couldn't determine the input document's charset. Please specify it on the command line. (non-ascii char at %s)"), $self->{TT}{non_ascii_ref}) + } + } + return $text; +} + + +# encode_from_to($,$,$) +# +# Encode the given text from one encoding to another one. +# It differs from Encode::from_to because it does not take the name of the +# encoding in argument, but the encoders (as returned by the +# Encode::find_encoding(<name>) method). Thus it permits to save a bunch +# of call to find_encoding. +# +# If the "from" encoding is undefined, it is considered as UTF-8 (or +# ascii). +# If the "to" encoding is undefined, it is considered as UTF-8. +# +sub encode_from_to { + my ($text,$from,$to) = (shift,shift,shift); + + if (not defined $from) { + # for ascii and UTF-8, no conversion needed to get an utf-8 + # string. + } else { + $text = $from->decode($text, 0); + } + + if (not defined $to) { + # Already in UTF-8, no conversion needed + } else { + $text = $to->encode($text, 0); + } + + return $text; +} + +=back + +=head1 FUTURE DIRECTIONS + +One shortcoming of the current TransTractor is that it can't handle +translated document containing all languages, like debconf templates, or +.desktop files. + +To address this problem, the only interface changes needed are: + +=over 2 + +=item - + +take a hash as po_in_name (a list per language) + +=item - + +add an argument to translate to indicate the target language + +=item - + +make a pushline_all function, which would make pushline of its content for +all language, using a map-like syntax: + + $self->pushline_all({ "Description[".$langcode."]=". + $self->translate($line,$ref,$langcode) + }); + +=back + +Will see if it's enough ;) + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + Jordi Vilalta <jvprat@gmail.com> + +=cut + +1;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/po4a/lib/Locale/Po4a/Xml.pm Thu Mar 12 15:43:56 2009 +0800 @@ -0,0 +1,1973 @@ +#!/usr/bin/perl + +# Po4a::Xml.pm +# +# extract and translate translatable strings from XML documents. +# +# This code extracts plain text from tags and attributes from generic +# XML documents, and it can be used as a base to build modules for +# XML-based documents. +# +# Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com> +# Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +######################################################################## + +=head1 NAME + +Locale::Po4a::Xml - Convert XML documents and derivates from/to PO files + +=head1 DESCRIPTION + +The po4a (po for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +Locale::Po4a::Xml is a module to help the translation of XML documents into +other [human] languages. It can also be used as a base to build modules for +XML-based documents. + +=cut + +package Locale::Po4a::Xml; + +use 5.006; +use strict; +use warnings; + +require Exporter; +use vars qw(@ISA @EXPORT); +@ISA = qw(Locale::Po4a::TransTractor); +@EXPORT = qw(new initialize @tag_types); + +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; +use Carp qw(croak); +use File::Basename; +use File::Spec; + +#It will mantain the path from the root tag to the current one +my @path; + +#It will contain a list of external entities and their attached paths +my %entities; + +my @comments; + +sub shiftline { + my $self = shift; + # call Transtractor's shiftline + my ($line,$ref) = $self->SUPER::shiftline(); + return ($line,$ref) if (not defined $line); + + for my $k (keys %entities) { + if ($line =~ m/^(.*?)&$k;(.*)$/s) { + my ($before, $after) = ($1, $2); + my $linenum=0; + my @textentries; + + open (my $in, $entities{$k}) + or croak wrap_mod("po4a::xml", + dgettext("po4a", "Can't read from %s: %s"), + $entities{$k}, $!); + while (defined (my $textline = <$in>)) { + $linenum++; + my $textref=$entities{$k}.":$linenum"; + push @textentries, ($textline,$textref); + } + close $in + or croak wrap_mod("po4a::xml", + dgettext("po4a", "Can't close %s after reading: %s"), + $entities{$k}, $!); + + push @textentries, ($after, $ref); + $line = $before.(shift @textentries); + $ref .= " ".(shift @textentries); + $self->unshiftline(@textentries); + } + } + + return ($line,$ref); +} + +sub read { + my ($self,$filename)=@_; + push @{$self->{DOCPOD}{infile}}, $filename; + $self->Locale::Po4a::TransTractor::read($filename); +} + +sub parse { + my $self=shift; + map {$self->parse_file($_)} @{$self->{DOCPOD}{infile}}; +} + +# @save_holders is a stack of references to ('paragraph', 'translation', +# 'sub_translations', 'open', 'close', 'folded_attributes') hashes, where: +# paragraph is a reference to an array (see paragraph in the +# treat_content() subroutine) of strings followed by +# references. It contains the @paragraph array as it was +# before the processing was interrupted by a tag instroducing +# a placeholder. +# translation is the translation of this level up to now +# sub_translations is a reference to an array of strings containing the +# translations which must replace the placeholders. +# open is the tag which opened the placeholder. +# close is the tag which closed the placeholder. +# folded_attributes is an hash of tags with their attributes (<tag attrs=...> +# strings), referenced by the folded tag id, which should +# replace the <tag po4a-id=id> strings in the current +# translation. +# +# If @save_holders only has 1 holder, then we are not processing the +# content of an holder, we are translating the document. +my @save_holders; + + +# If we are at the bottom of the stack and there is no <placeholder ...> in +# the current translation, we can push the translation in the translated +# document. +# Otherwise, we keep the translation in the current holder. +sub pushline { + my ($self, $line) = (shift, shift); + + my $holder = $save_holders[$#save_holders]; + my $translation = $holder->{'translation'}; + $translation .= $line; + + while ( %{$holder->{folded_attributes}} + and $translation =~ m/^(.*)<([^>]+?)\s+po4a-id=([0-9]+)>(.*)$/s) { + my $begin = $1; + my $tag = $2; + my $id = $3; + my $end = $4; + if (defined $holder->{folded_attributes}->{$id}) { + # TODO: check if the tag is the same + $translation = $begin.$holder->{folded_attributes}->{$id}.$end; + delete $holder->{folded_attributes}->{$id}; + } else { + # TODO: It will be hard to identify the location. + # => find a way to retrieve the reference. + die wrap_mod("po4a::xml", dgettext("po4a", "'po4a-id=%d' in the translation does not exist in the original string (or 'po4a-id=%d' used twice in the translation)."), $id, $id); + } + } +# TODO: check that %folded_attributes is empty at some time +# => in translate_paragraph? + + if ( ($#save_holders > 0) + or ($translation =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s)) { + $holder->{'translation'} = $translation; + } else { + $self->SUPER::pushline($translation); + $holder->{'translation'} = ''; + } +} + +=head1 TRANSLATING WITH PO4A::XML + +This module can be used directly to handle generic XML documents. This will +extract all tag's content, and no attributes, since it's where the text is +written in most XML based documents. + +There are some options (described in the next section) that can customize +this behavior. If this doesn't fit to your document format you're encouraged +to write your own module derived from this, to describe your format's details. +See the section "Writing derivate modules" below, for the process description. + +=cut + +# +# Parse file and translate it +# +sub parse_file { + my ($self,$filename) = @_; + my $eof = 0; + + while (!$eof) { + # We get all the text until the next breaking tag (not + # inline) and translate it + $eof = $self->treat_content; + if (!$eof) { + # And then we treat the following breaking tag + $eof = $self->treat_tag; + } + } +} + +=head1 OPTIONS ACCEPTED BY THIS MODULE + +The global debug option causes this module to show the excluded strings, in +order to see if it skips something important. + +These are this module's particular options: + +=over 4 + +=item B<nostrip> + +Prevents it to strip the spaces around the extracted strings. + +=item B<wrap> + +Canonizes the string to translate, considering that whitespaces are not +important, and wraps the translated document. This option can be overridden +by custom tag options. See the "tags" option below. + +=item B<caseinsensitive> + +It makes the tags and attributes searching to work in a case insensitive +way. If it's defined, it will treat E<lt>BooKE<gt>laNG and E<lt>BOOKE<gt>Lang as E<lt>bookE<gt>lang. + +=item B<includeexternal> + +When defined, external entities are included in the generated (translated) +document, and for the extraction of strings. If it's not defined, you +will have to translate external entities separately as independent +documents. + +=item B<ontagerror> + +This option defines the behavior of the module when it encounter a invalid +Xml syntax (a closing tag which does not match the last opening tag, or a +tag's attribute without value). +It can take the following values: + +=over + +=item I<fail> + +This is the default value. +The module will exit with an error. + +=item I<warn> + +The module will continue, and will issue a warning. + +=item I<silent> + +The module will continue without any warnings. + +=back + +Be careful when using this option. +It is generally recommended to fix the input file. + +=item B<tagsonly> + +Extracts only the specified tags in the "tags" option. Otherwise, it +will extract all the tags except the ones specified. + +Note: This option is deprecated. + +=item B<doctype> + +String that will try to match with the first line of the document's doctype +(if defined). If it doesn't, a warning will indicate that the document +might be of a bad type. + +=item B<tags> + +Space-separated list of tags you want to translate or skip. By default, +the specified tags will be excluded, but if you use the "tagsonly" option, +the specified tags will be the only ones included. The tags must be in the +form E<lt>aaaE<gt>, but you can join some (E<lt>bbbE<gt>E<lt>aaaE<gt>) to say that the content of +the tag E<lt>aaaE<gt> will only be translated when it's into a E<lt>bbbE<gt> tag. + +You can also specify some tag options putting some characters in front of +the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap) +to override the default behavior specified by the global "wrap" option. + +Example: WE<lt>chapterE<gt>E<lt>titleE<gt> + +Note: This option is deprecated. +You should use the B<translated> and B<untranslated> options instead. + +=item B<attributes> + +Space-separated list of tag's attributes you want to translate. You can +specify the attributes by their name (for example, "lang"), but you can +prefix it with a tag hierarchy, to specify that this attribute will only be +translated when it's into the specified tag. For example: E<lt>bbbE<gt>E<lt>aaaE<gt>lang +specifies that the lang attribute will only be translated if it's into an +E<lt>aaaE<gt> tag, and it's into a E<lt>bbbE<gt> tag. + +=item B<foldattributes> + +Do not translate attributes in inline tags. +Instead, replace all attributes of a tag by po4a-id=<id>. + +This is useful when attributes shall not be translated, as this simplifies the +strings for translators, and avoids typos. + +=item B<break> + +Space-separated list of tags which should break the sequence. +By default, all tags break the sequence. + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's into another tag (<bbb>). + +=item B<inline> + +Space-separated list of tags which should be treated as inline. +By default, all tags break the sequence. + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's into another tag (<bbb>). + +=item B<placeholder> + +Space-separated list of tags which should be treated as placeholders. +Placeholders do not break the sequence, but the content of placeholders is +translated separately. + +The location of the placeholder in its blocks will be marked with a string +similar to: + + <placeholder type=\"footnote\" id=\"0\"/> + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's into another tag (<bbb>). + +=item B<nodefault> + +Space separated list of tags that the module should not try to set by +default in any category. + +=item B<cpp> + +Support C preprocessor directives. +When this option is set, po4a will consider preprocessor directives as +paragraph separators. +This is important if the XML file must be preprocessed because otherwise +the directives may be inserted in the middle of lines if po4a consider it +belong to the current paragraph, and they won't be recognized by the +preprocessor. +Note: the preprocessor directives must only appear between tags +(they must not break a tag). + +=item B<translated> + +Space-separated list of tags you want to translate. + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's into another tag (<bbb>). + +You can also specify some tag options putting some characters in front of +the tag hierarchy. For example, you can put 'w' (wrap) or 'W' (don't wrap) +to overide the default behavior specified by the global "wrap" option. + +Example: WE<lt>chapterE<gt>E<lt>titleE<gt> + +=item B<untranslated> + +Space-separated list of tags you do not want to translate. + +The tags must be in the form <aaa>, but you can join some +(<bbb><aaa>), if a tag (<aaa>) should only be considered +when it's into another tag (<bbb>). + +=item B<defaulttranslateoption> + +The default categories for tags that are not in any of the translated, +untranslated, break, inline, or placeholder. + +This is a set of letters: + +=over + +=item I<w> + +Tags should be translated and content can be re-wrapped. + +=item I<W> + +Tags should be translated and content should not be re-wrapped. + +=item I<i> + +Tags should be translated inline. + +=item I<p> + +Tags should be translated as placeholders. + +=back + +=back + +=cut +# TODO: defaulttranslateoption +# w => indicate that it is only valid for translatable tags and do not +# care about inline/break/placeholder? +# ... + +sub initialize { + my $self = shift; + my %options = @_; + + # Reset the path + @path = (); + + # Initialize the stack of holders + my @paragraph = (); + my @sub_translations = (); + my %folded_attributes; + my %holder = ('paragraph' => \@paragraph, + 'translation' => "", + 'sub_translations' => \@sub_translations, + 'folded_attributes' => \%folded_attributes); + @save_holders = (\%holder); + + $self->{options}{'nostrip'}=0; + $self->{options}{'wrap'}=0; + $self->{options}{'caseinsensitive'}=0; + $self->{options}{'tagsonly'}=0; + $self->{options}{'tags'}=''; + $self->{options}{'break'}=''; + $self->{options}{'translated'}=''; + $self->{options}{'untranslated'}=''; + $self->{options}{'defaulttranslateoption'}=''; + $self->{options}{'attributes'}=''; + $self->{options}{'foldattributes'}=0; + $self->{options}{'inline'}=''; + $self->{options}{'placeholder'}=''; + $self->{options}{'doctype'}=''; + $self->{options}{'nodefault'}=''; + $self->{options}{'includeexternal'}=0; + $self->{options}{'ontagerror'}="fail"; + $self->{options}{'cpp'}=0; + + $self->{options}{'verbose'}=''; + $self->{options}{'debug'}=''; + + foreach my $opt (keys %options) { + if ($options{$opt}) { + die wrap_mod("po4a::xml", + dgettext("po4a", "Unknown option: %s"), $opt) + unless exists $self->{options}{$opt}; + $self->{options}{$opt} = $options{$opt}; + } + } + # Default options set by modules. Forbidden for users. + $self->{options}{'_default_translated'}=''; + $self->{options}{'_default_untranslated'}=''; + $self->{options}{'_default_break'}=''; + $self->{options}{'_default_inline'}=''; + $self->{options}{'_default_placeholder'}=''; + $self->{options}{'_default_attributes'}=''; + + #It will maintain the list of the translatable tags + $self->{tags}=(); + $self->{translated}=(); + $self->{untranslated}=(); + #It will maintain the list of the translatable attributes + $self->{attributes}=(); + #It will maintain the list of the breaking tags + $self->{break}=(); + #It will maintain the list of the inline tags + $self->{inline}=(); + #It will maintain the list of the placeholder tags + $self->{placeholder}=(); + #list of the tags that must not be set in the tags or inline category + #by this module or sub-module (unless specified in an option) + $self->{nodefault}=(); + + $self->treat_options; +} + +=head1 WRITING DERIVATE MODULES + +=head2 DEFINE WHAT TAGS AND ATTRIBUTES TO TRANSLATE + +The simplest customization is to define which tags and attributes you want +the parser to translate. This should be done in the initialize function. +First you should call the main initialize, to get the command-line options, +and then, append your custom definitions to the options hash. If you want +to treat some new options from command line, you should define them before +calling the main initialize: + + $self->{options}{'new_option'}=''; + $self->SUPER::initialize(%options); + $self->{options}{'_default_translated'}.=' <p> <head><title>'; + $self->{options}{'attributes'}.=' <p>lang id'; + $self->{options}{'_default_inline'}.=' <br>'; + $self->treat_options; + +You should use the B<_default_inline>, B<_default_break>, +B<_default_placeholder>, B<_default_translated>, B<_default_untranslated>, +and B<_default_attributes> options in derivated modules. This allow users +to override the default behavior defined in your module with command line +options. + +=head2 OVERRIDING THE found_string FUNCTION + +Another simple step is to override the function "found_string", which +receives the extracted strings from the parser, in order to translate them. +There you can control which strings you want to translate, and perform +transformations to them before or after the translation itself. + +It receives the extracted text, the reference on where it was, and a hash +that contains extra information to control what strings to translate, how +to translate them and to generate the comment. + +The content of these options depends on the kind of string it is (specified in an +entry of this hash): + +=over + +=item type="tag" + +The found string is the content of a translatable tag. The entry "tag_options" +contains the option characters in front of the tag hierarchy in the module +"tags" option. + +=item type="attribute" + +Means that the found string is the value of a translatable attribute. The +entry "attribute" has the name of the attribute. + +=back + +It must return the text that will replace the original in the translated +document. Here's a basic example of this function: + + sub found_string { + my ($self,$text,$ref,$options)=@_; + $text = $self->translate($text,$ref,"type ".$options->{'type'}, + 'wrap'=>$self->{options}{'wrap'}); + return $text; + } + +There's another simple example in the new Dia module, which only filters +some strings. + +=cut + +sub found_string { + my ($self,$text,$ref,$options)=@_; + + if ($text =~ m/^\s*$/s) { + return $text; + } + + my $comment; + my $wrap = $self->{options}{'wrap'}; + + if ($options->{'type'} eq "tag") { + $comment = "Content of: ".$self->get_path; + + if($options->{'tag_options'} =~ /w/) { + $wrap = 1; + } + if($options->{'tag_options'} =~ /W/) { + $wrap = 0; + } + } elsif ($options->{'type'} eq "attribute") { + $comment = "Attribute '".$options->{'attribute'}."' of: ".$self->get_path; + } elsif ($options->{'type'} eq "CDATA") { + $comment = "CDATA"; + $wrap = 0; + } else { + die wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Internal error: unknown type identifier '%s'."), $options->{'type'}); + } + $text = $self->translate($text,$ref,$comment,'wrap'=>$wrap, comment => $options->{'comments'}); + return $text; +} + +=head2 MODIFYING TAG TYPES (TODO) + +This is a more complex one, but it enables a (almost) total customization. +It's based in a list of hashes, each one defining a tag type's behavior. The +list should be sorted so that the most general tags are after the most +concrete ones (sorted first by the beginning and then by the end keys). To +define a tag type you'll have to make a hash with the following keys: + +=over 4 + +=item beginning + +Specifies the beginning of the tag, after the "E<lt>". + +=item end + +Specifies the end of the tag, before the "E<gt>". + +=item breaking + +It says if this is a breaking tag class. A non-breaking (inline) tag is one +that can be taken as part of the content of another tag. It can take the +values false (0), true (1) or undefined. If you leave this undefined, you'll +have to define the f_breaking function that will say whether a concrete tag of +this class is a breaking tag or not. + +=item f_breaking + +It's a function that will tell if the next tag is a breaking one or not. It +should be defined if the "breaking" option is not. + +=item f_extract + +If you leave this key undefined, the generic extraction function will have to +extract the tag itself. It's useful for tags that can have other tags or +special structures in them, so that the main parser doesn't get mad. This +function receives a boolean that says if the tag should be removed from the +input stream or not. + +=item f_translate + +This function receives the tag (in the get_string_until() format) and returns +the translated tag (translated attributes or all needed transformations) as a +single string. + +=back + +=cut + +##### Generic XML tag types #####' + +our @tag_types = ( + { beginning => "!--#", + end => "--", + breaking => 0, + f_extract => \&tag_extract_comment, + f_translate => \&tag_trans_comment}, + { beginning => "!--", + end => "--", + breaking => 0, + f_extract => \&tag_extract_comment, + f_translate => \&tag_trans_comment}, + { beginning => "?xml", + end => "?", + breaking => 1, + f_translate => \&tag_trans_xmlhead}, + { beginning => "?", + end => "?", + breaking => 1, + f_translate => \&tag_trans_procins}, + { beginning => "!DOCTYPE", + end => "", + breaking => 1, + f_extract => \&tag_extract_doctype, + f_translate => \&tag_trans_doctype}, + { beginning => "![CDATA[", + end => "", + breaking => 1, + f_extract => \&CDATA_extract, + f_translate => \&CDATA_trans}, + { beginning => "/", + end => "", + f_breaking => \&tag_break_close, + f_translate => \&tag_trans_close}, + { beginning => "", + end => "/", + f_breaking => \&tag_break_alone, + f_translate => \&tag_trans_alone}, + { beginning => "", + end => "", + f_breaking => \&tag_break_open, + f_translate => \&tag_trans_open} +); + +sub tag_extract_comment { + my ($self,$remove)=(shift,shift); + my ($eof,@tag)=$self->get_string_until('-->',{include=>1,remove=>$remove}); + return ($eof,@tag); +} + +sub tag_trans_comment { + my ($self,@tag)=@_; + return $self->join_lines(@tag); +} + +sub tag_trans_xmlhead { + my ($self,@tag)=@_; + + # We don't have to translate anything from here: throw away references + my $tag = $self->join_lines(@tag); + $tag =~ /encoding=(("|')|)(.*?)(\s|\2)/s; + my $in_charset=$3; + $self->detected_charset($in_charset); + my $out_charset=$self->get_out_charset; + + if (defined $in_charset) { + $tag =~ s/$in_charset/$out_charset/; + } else { + if ($tag =~ m/standalone/) { + $tag =~ s/(standalone)/encoding="$out_charset" $1/; + } else { + $tag.= " encoding=\"$out_charset\""; + } + } + + return $tag; +} + +sub tag_trans_procins { + my ($self,@tag)=@_; + return $self->join_lines(@tag); +} + +sub tag_extract_doctype { + my ($self,$remove)=(shift,shift); + + # Check if there is an internal subset (between []). + my ($eof,@tag)=$self->get_string_until('>',{include=>1,unquoted=>1}); + my $parity = 0; + my $paragraph = ""; + map { $parity = 1 - $parity; $paragraph.= $parity?$_:""; } @tag; + my $found = 0; + if ($paragraph =~ m/<.*\[.*</s) { + $found = 1 + } + + if (not $found) { + ($eof,@tag)=$self->get_string_until('>',{include=>1,remove=>$remove,unquoted=>1}); + } else { + ($eof,@tag)=$self->get_string_until(']\s*>',{include=>1,remove=>$remove,unquoted=>1,regex=>1}); + } + return ($eof,@tag); +} + +sub tag_trans_doctype { +# This check is not really reliable. There are system and public +# identifiers. Only the public one could be checked reliably. + my ($self,@tag)=@_; + if (defined $self->{options}{'doctype'} ) { + my $doctype = $self->{options}{'doctype'}; + if ( $tag[0] !~ /\Q$doctype\E/i ) { + warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Bad document type. '%s' expected. You can fix this warning with a -o doctype option, or ignore this check with -o doctype=\"\"."), $doctype); + } + } + my $i = 0; + my $basedir = $tag[1]; + $basedir =~ s/:[0-9]+$//; + $basedir = dirname($basedir); + + while ( $i < $#tag ) { + my $t = $tag[$i]; + my $ref = $tag[$i+1]; + if ( $t =~ /^(\s*<!ENTITY\s+)(.*)$/is ) { + my $part1 = $1; + my $part2 = $2; + my $includenow = 0; + my $file = 0; + my $name = ""; + if ($part2 =~ /^(%\s+)(.*)$/s ) { + $part1.= $1; + $part2 = $2; + $includenow = 1; + } + $part2 =~ /^(\S+)(\s+)(.*)$/s; + $name = $1; + $part1.= $1.$2; + $part2 = $3; + if ( $part2 =~ /^(SYSTEM\s+)(.*)$/is ) { + $part1.= $1; + $part2 = $2; + $file = 1; + if ($self->{options}{'includeexternal'}) { + $entities{$name} = $part2; + $entities{$name} =~ s/^"?(.*?)".*$/$1/s; + $entities{$name} = File::Spec->catfile($basedir, $entities{$name}); + } + } + if ((not $file) and (not $includenow)) { + if ($part2 =~ m/^\s*(["'])(.*)\1(\s*>.*)$/s) { + my $comment = "Content of the $name entity"; + my $quote = $1; + my $text = $2; + $part2 = $3; + $text = $self->translate($text, + $ref, + $comment, + 'wrap'=>1); + $t = $part1."$quote$text$quote$part2"; + } + } +# print $part1."\n"; +# print $name."\n"; +# print $part2."\n"; + } + $tag[$i] = $t; + $i += 2; + } + return $self->join_lines(@tag); +} + +sub tag_break_close { + my ($self,@tag)=@_; + my $struct = $self->get_path; + my $options = $self->get_translate_options($struct); + if ($options =~ m/[ip]/) { + return 0; + } else { + return 1; + } +} + +sub tag_trans_close { + my ($self,@tag)=@_; + my $name = $self->get_tag_name(@tag); + + my $test = pop @path; + if (!defined($test) || $test ne $name ) { + my $ontagerror = $self->{options}{'ontagerror'}; + if ($ontagerror eq "warn") { + warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name); + } elsif ($ontagerror ne "silent") { + die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name); + } + } + return $self->join_lines(@tag); +} + +sub CDATA_extract { + my ($self,$remove)=(shift,shift); + my ($eof, @tag) = $self->get_string_until(']]>',{include=>1,unquoted=>0,remove=>$remove}); + + return ($eof, @tag); +} + +sub CDATA_trans { + my ($self,@tag)=@_; + return $self->found_string($self->join_lines(@tag), + $tag[1], + {'type' => "CDATA"}); +} + +sub tag_break_alone { + my ($self,@tag)=@_; + my $struct = $self->get_path($self->get_tag_name(@tag)); + if ($self->get_translate_options($struct) =~ m/i/) { + return 0; + } else { + return 1; + } +} + +sub tag_trans_alone { + my ($self,@tag)=@_; + my $name = $self->get_tag_name(@tag); + push @path, $name; + + $name = $self->treat_attributes(@tag); + + pop @path; + return $name; +} + +sub tag_break_open { + my ($self,@tag)=@_; + my $struct = $self->get_path($self->get_tag_name(@tag)); + my $options = $self->get_translate_options($struct); + if ($options =~ m/[ip]/) { + return 0; + } else { + return 1; + } +} + +sub tag_trans_open { + my ($self,@tag)=@_; + my $name = $self->get_tag_name(@tag); + push @path, $name; + + $name = $self->treat_attributes(@tag); + + return $name; +} + +##### END of Generic XML tag types ##### + +=head1 INTERNAL FUNCTIONS used to write derivated parsers + +=head2 WORKING WITH TAGS + +=over 4 + +=item get_path() + +This function returns the path to the current tag from the document's root, +in the form E<lt>htmlE<gt>E<lt>bodyE<gt>E<lt>pE<gt>. + +An additional array of tags (without brackets) can be passed in argument. +These path elements are added to the end of the current path. + +=cut + +sub get_path { + my $self = shift; + my @add = @_; + if ( @path > 0 or @add > 0 ) { + return "<".join("><",@path,@add).">"; + } else { + return "outside any tag (error?)"; + } +} + +=item tag_type() + +This function returns the index from the tag_types list that fits to the next +tag in the input stream, or -1 if it's at the end of the input file. + +=cut + +sub tag_type { + my $self = shift; + my ($line,$ref) = $self->shiftline(); + my ($match1,$match2); + my $found = 0; + my $i = 0; + + if (!defined($line)) { return -1; } + + $self->unshiftline($line,$ref); + my ($eof,@lines) = $self->get_string_until(">",{include=>1,unquoted=>1}); + my $line2 = $self->join_lines(@lines); + while (!$found && $i < @tag_types) { + ($match1,$match2) = ($tag_types[$i]->{beginning},$tag_types[$i]->{end}); + if ($line =~ /^<\Q$match1\E/) { + if (!defined($tag_types[$i]->{f_extract})) { +#print substr($line2,length($line2)-1-length($match2),1+length($match2))."\n"; + if (defined($line2) and $line2 =~ /\Q$match2\E>$/) { + $found = 1; +#print "YES: <".$match1." ".$match2.">\n"; + } else { +#print "NO: <".$match1." ".$match2.">\n"; + $i++; + } + } else { + $found = 1; + } + } else { + $i++; + } + } + if (!$found) { + #It should never enter here, unless you undefine the most + #general tags (as <...>) + die "po4a::xml: Unknown tag type: ".$line."\n"; + } else { + return $i; + } +} + +=item extract_tag($$) + +This function returns the next tag from the input stream without the beginning +and end, in an array form, to maintain the references from the input file. It +has two parameters: the type of the tag (as returned by tag_type) and a +boolean, that indicates if it should be removed from the input stream. + +=cut + +sub extract_tag { + my ($self,$type,$remove) = (shift,shift,shift); + my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end}); + my ($eof,@tag); + if (defined($tag_types[$type]->{f_extract})) { + ($eof,@tag) = &{$tag_types[$type]->{f_extract}}($self,$remove); + } else { + ($eof,@tag) = $self->get_string_until($match2.">",{include=>1,remove=>$remove,unquoted=>1}); + } + $tag[0] =~ /^<\Q$match1\E(.*)$/s; + $tag[0] = $1; + $tag[$#tag-1] =~ /^(.*)\Q$match2\E>$/s; + $tag[$#tag-1] = $1; + return ($eof,@tag); +} + +=item get_tag_name(@) + +This function returns the name of the tag passed as an argument, in the array +form returned by extract_tag. + +=cut + +sub get_tag_name { + my ($self,@tag)=@_; + $tag[0] =~ /^(\S*)/; + return $1; +} + +=item breaking_tag() + +This function returns a boolean that says if the next tag in the input stream +is a breaking tag or not (inline tag). It leaves the input stream intact. + +=cut + +sub breaking_tag { + my $self = shift; + my $break; + + my $type = $self->tag_type; + if ($type == -1) { return 0; } + +#print "TAG TYPE = ".$type."\n"; + $break = $tag_types[$type]->{breaking}; + if (!defined($break)) { + # This tag's breaking depends on its content + my ($eof,@lines) = $self->extract_tag($type,0); + $break = &{$tag_types[$type]->{f_breaking}}($self,@lines); + } +#print "break = ".$break."\n"; + return $break; +} + +=item treat_tag() + +This function translates the next tag from the input stream. Using each +tag type's custom translation functions. + +=cut + +sub treat_tag { + my $self = shift; + my $type = $self->tag_type; + + my ($match1,$match2) = ($tag_types[$type]->{beginning},$tag_types[$type]->{end}); + my ($eof,@lines) = $self->extract_tag($type,1); + + $lines[0] =~ /^(\s*)(.*)$/s; + my $space1 = $1; + $lines[0] = $2; + $lines[$#lines-1] =~ /^(.*?)(\s*)$/s; + my $space2 = $2; + $lines[$#lines-1] = $1; + + # Calling this tag type's specific handling (translation of + # attributes...) + my $line = &{$tag_types[$type]->{f_translate}}($self,@lines); + $self->pushline("<".$match1.$space1.$line.$space2.$match2.">"); + return $eof; +} + +=item tag_in_list($@) + +This function returns a string value that says if the first argument (a tag +hierarchy) matches any of the tags from the second argument (a list of tags +or tag hierarchies). If it doesn't match, it returns 0. Else, it returns the +matched tag's options (the characters in front of the tag) or 1 (if that tag +doesn't have options). + +=back + +=cut +sub tag_in_list ($$$) { + my ($self,$path,$list) = @_; + if ($self->{options}{'caseinsensitive'}) { + $path = lc $path; + } + + while (1) { + if (defined $list->{$path}) { + if (length $list->{$path}) { + return $list->{$path}; + } else { + return 1; + } + } + last unless ($path =~ m/</); + $path =~ s/^<.*?>//; + } + + return 0; +} + +=head2 WORKING WITH ATTRIBUTES + +=over 4 + +=item treat_attributes(@) + +This function handles the translation of the tags' attributes. It receives the tag +without the beginning / end marks, and then it finds the attributes, and it +translates the translatable ones (specified by the module option "attributes"). +This returns a plain string with the translated tag. + +=back + +=cut + +sub treat_attributes { + my ($self,@tag)=@_; + + $tag[0] =~ /^(\S*)(.*)/s; + my $text = $1; + $tag[0] = $2; + + while (@tag) { + my $complete = 1; + + $text .= $self->skip_spaces(\@tag); + if (@tag) { + # Get the attribute's name + $complete = 0; + + $tag[0] =~ /^([^\s=]+)(.*)/s; + my $name = $1; + my $ref = $tag[1]; + $tag[0] = $2; + $text .= $name; + $text .= $self->skip_spaces(\@tag); + if (@tag) { + # Get the '=' + if ($tag[0] =~ /^=(.*)/s) { + $tag[0] = $1; + $text .= "="; + $text .= $self->skip_spaces(\@tag); + if (@tag) { + # Get the value + my $value=""; + $ref=$tag[1]; + my $quot=substr($tag[0],0,1); + if ($quot ne "\"" and $quot ne "'") { + # Unquoted value + $quot=""; + $tag[0] =~ /^(\S+)(.*)/s; + $value = $1; + $tag[0] = $2; + } else { + # Quoted value + $text .= $quot; + $tag[0] =~ /^\Q$quot\E(.*)/s; + $tag[0] = $1; + while ($tag[0] !~ /\Q$quot\E/) { + $value .= $tag[0]; + shift @tag; + shift @tag; + } + $tag[0] =~ /^(.*?)\Q$quot\E(.*)/s; + $value .= $1; + $tag[0] = $2; + } + $complete = 1; + if ($self->tag_in_list($self->get_path.$name,$self->{attributes})) { + $text .= $self->found_string($value, $ref, { type=>"attribute", attribute=>$name }); + } else { + print wrap_ref_mod($ref, "po4a::xml", dgettext("po4a", "Content of attribute %s excluded: %s"), $self->get_path.$name, $value) + if $self->debug(); + $text .= $self->recode_skipped_text($value); + } + $text .= $quot; + } + } + } + + unless ($complete) { + my $ontagerror = $self->{options}{'ontagerror'}; + if ($ontagerror eq "warn") { + warn wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax. Continuing...")); + } elsif ($ontagerror ne "silent") { + die wrap_ref_mod($ref, "po4a::xml", dgettext ("po4a", "Bad attribute syntax")); + } + } + } + } + return $text; +} + +# Returns an empty string if the content in the $path should not be +# translated. +# +# Otherwise, returns the set of options for translation: +# w: the content shall be re-wrapped +# W: the content shall not be re-wrapped +# i: the tag shall be inlined +# p: a placeholder shall replace the tag (and its content) +# +# A translatable inline tag in an untranslated tag is treated as a translatable breaking tag. +my %translate_options_cache; +sub get_translate_options { + my $self = shift; + my $path = shift; + + if (defined $translate_options_cache{$path}) { + return $translate_options_cache{$path}; + } + + my $options = ""; + my $translate = 0; + my $usedefault = 1; + + my $inlist = 0; + my $tag = $self->get_tag_from_list($path, $self->{tags}); + if (defined $tag) { + $inlist = 1; + } + if ($self->{options}{'tagsonly'} eq $inlist) { + $usedefault = 0; + if (defined $tag) { + $options = $tag; + $options =~ s/<.*$//; + } else { + if ($self->{options}{'wrap'}) { + $options = "w"; + } else { + $options = "W"; + } + } + $translate = 1; + } + +# TODO: a less precise set of tags should not override a more precise one + # The tags and tagsonly options are deprecated. + # The translated and untranslated options have an higher priority. + $tag = $self->get_tag_from_list($path, $self->{translated}); + if (defined $tag) { + $usedefault = 0; + $options = $tag; + $options =~ s/<.*$//; + $translate = 1; + } + + if ($translate and $options !~ m/w/i) { + $options .= ($self->{options}{'wrap'})?"w":"W"; + } + + if (not defined $tag) { + $tag = $self->get_tag_from_list($path, $self->{untranslated}); + if (defined $tag) { + $usedefault = 0; + $options = ""; + $translate = 0; + } + } + + $tag = $self->get_tag_from_list($path, $self->{inline}); + if (defined $tag) { + $usedefault = 0; + $options .= "i"; + } else { + $tag = $self->get_tag_from_list($path, $self->{placeholder}); + if (defined $tag) { + $usedefault = 0; + $options .= "p"; + } + } + + if ($usedefault) { + $options = $self->{options}{'defaulttranslateoption'}; + } + + # A translatable inline tag in an untranslated tag is treated as a + # translatable breaking tag. + if ($options =~ m/i/) { + my $ppath = $path; + $ppath =~ s/<[^>]*>$//; + my $poptions = $self->get_translate_options ($ppath); + if ($poptions eq "") { + $options =~ s/i//; + } + } + + if ($options =~ m/i/ and $self->{options}{'foldattributes'}) { + $options .= "f"; + } + + $translate_options_cache{$path} = $options; + return $options; +} + + +# Return the tag (or biggest set of tags) of a list which matches with the +# given path. +# +# The tag (or set of tags) is returned with its options. +# +# If no tags could match the path, undef is returned. +sub get_tag_from_list ($$$) { + my ($self,$path,$list) = @_; + if ($self->{options}{'caseinsensitive'}) { + $path = lc $path; + } + + while (1) { + if (defined $list->{$path}) { + return $list->{$path}.$path; + } + last unless ($path =~ m/</); + $path =~ s/^<.*?>//; + } + + return undef; +} + + + +sub treat_content { + my $self = shift; + my $blank=""; + # Indicates if the paragraph will have to be translated + my $translate = ""; + + my ($eof,@paragraph)=$self->get_string_until('<',{remove=>1}); + + while (!$eof and !$self->breaking_tag) { + NEXT_TAG: + my @text; + my $type = $self->tag_type; + my $f_extract = $tag_types[$type]->{'f_extract'}; + if ( defined($f_extract) + and $f_extract eq \&tag_extract_comment) { + # Remove the content of the comments + ($eof, @text) = $self->extract_tag($type,1); + $text[$#text-1] .= "\0"; + if ($tag_types[$type]->{'beginning'} eq "!--#") { + $text[0] = "#".$text[0]; + } + push @comments, @text; + } else { + my ($tmpeof, @tag) = $self->extract_tag($type,0); + # Append the found inline tag + ($eof,@text)=$self->get_string_until('>', + {include=>1, + remove=>1, + unquoted=>1}); + # Append or remove the opening/closing tag from + # the tag path + if ($tag_types[$type]->{'end'} eq "") { + if ($tag_types[$type]->{'beginning'} eq "") { + # Opening inline tag + my $cur_tag_name = $self->get_tag_name(@tag); + my $t_opts = $self->get_translate_options($self->get_path($cur_tag_name)); + if ($t_opts =~ m/p/) { + # We enter a new holder. + # Append a <placeholder ...> tag to the current + # paragraph, and save the @paragraph in the + # current holder. + my $last_holder = $save_holders[$#save_holders]; + my $placeholder_str = "<placeholder type=\"".$cur_tag_name."\" id=\"".($#{$last_holder->{'sub_translations'}}+1)."\"/>"; + push @paragraph, ($placeholder_str, $text[1]); + my @saved_paragraph = @paragraph; + + $last_holder->{'paragraph'} = \@saved_paragraph; + + # Then we must push a new holder + my @new_paragraph = (); + my @sub_translations = (); + my %folded_attributes; + my %new_holder = ('paragraph' => \@new_paragraph, + 'open' => $text[0], + 'translation' => "", + 'close' => undef, + 'sub_translations' => \@sub_translations, + 'folded_attributes' => \%folded_attributes); + push @save_holders, \%new_holder; + @text = (); + + # The current @paragraph + # (for the current holder) + # is empty. + @paragraph = (); + } elsif ($t_opts =~ m/f/) { + my $tag_full = $self->join_lines(@text); + my $tag_ref = $text[1]; + if ($tag_full =~ m/^<\s*\S+\s+\S.*>$/s) { + my $holder = $save_holders[$#save_holders]; + my $id = 0; + foreach (keys %{$holder->{folded_attributes}}) { + $id = $_ + 1 if ($_ >= $id); + } + $holder->{folded_attributes}->{$id} = $tag_full; + + @text = ("<$cur_tag_name po4a-id=$id>", $tag_ref); + } + } + push @path, $cur_tag_name; + } elsif ($tag_types[$type]->{'beginning'} eq "/") { + # Closing inline tag + + # Check if this is closing the + # last opening tag we detected. + my $test = pop @path; + my $name = $self->get_tag_name(@tag); + if (!defined($test) || + $test ne $name ) { + my $ontagerror = $self->{options}{'ontagerror'}; + if ($ontagerror eq "warn") { + warn wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong. Continuing..."), $name); + } elsif ($ontagerror ne "silent") { + die wrap_ref_mod($tag[1], "po4a::xml", dgettext("po4a", "Unexpected closing tag </%s> found. The main document may be wrong."), $name); + } + } + + if ($self->get_translate_options($self->get_path($self->get_tag_name(@tag))) =~ m/p/) { + # This closes the current holder. + + push @path, $self->get_tag_name(@tag); + # Now translate this paragraph if needed. + # This will call pushline and append the + # translation to the current holder's translation. + $self->translate_paragraph(@paragraph); + pop @path; + + # Now that this holder is closed, we can remove + # the holder from the stack. + my $holder = pop @save_holders; + # We need to keep the translation of this holder + my $translation = $holder->{'open'}.$holder->{'translation'}.$text[0]; + # FIXME: @text could be multilines. + + @text = (); + + # Then we store the translation in the previous + # holder's sub_translations array + my $previous_holder = $save_holders[$#save_holders]; + push @{$previous_holder->{'sub_translations'}}, $translation; + # We also need to restore the @paragraph array, as + # it was before we encountered the holder. + @paragraph = @{$previous_holder->{'paragraph'}}; + } + } + } + push @paragraph, @text; + } + + # Next tag + ($eof,@text)=$self->get_string_until('<',{remove=>1}); + if ($#text > 0) { + # Check if text (extracted after the inline tag) + # has to be translated + push @paragraph, @text; + } + } + + # This strips the extracted strings + # (only if you don't specify the 'nostrip' option, and if the + # paragraph can be re-wrapped) + $translate = $self->get_translate_options($self->get_path); + if (!$self->{options}{'nostrip'} and $translate !~ m/W/) { + my $clean = 0; + # Clean the beginning + while (!$clean and $#paragraph > 0) { + $paragraph[0] =~ /^(\s*)(.*)/s; + my $match = $1; + if ($paragraph[0] eq $match) { + if ($match ne "") { + $self->pushline($match); + } + shift @paragraph; + shift @paragraph; + } else { + $paragraph[0] = $2; + if ($match ne "") { + $self->pushline($match); + } + $clean = 1; + } + } + $clean = 0; + # Clean the end + while (!$clean and $#paragraph > 0) { + $paragraph[$#paragraph-1] =~ /^(.*?)(\s*)$/s; + my $match = $2; + if ($paragraph[$#paragraph-1] eq $match) { + if ($match ne "") { + $blank = $match.$blank; + } + pop @paragraph; + pop @paragraph; + } else { + $paragraph[$#paragraph-1] = $1; + if ($match ne "") { + $blank = $match.$blank; + } + $clean = 1; + } + } + } + + # Translate the string when needed + # This will either push the translation in the translated document or + # in the current holder translation. + $self->translate_paragraph(@paragraph); + + # Push the trailing blanks + if ($blank ne "") { + $self->pushline($blank); + } + return $eof; +} + +# Translate a @paragraph array of (string, reference). +# The $translate argument indicates if the strings must be translated or +# just pushed +sub translate_paragraph { + my $self = shift; + my @paragraph = @_; + my $translate = $self->get_translate_options($self->get_path); + + while ( (scalar @paragraph) + and ($paragraph[0] =~ m/^\s*\n/s)) { + $self->pushline($paragraph[0]); + shift @paragraph; + shift @paragraph; + } + + my $comments; + while (@comments) { + my ($comment,$eoc); + do { + my ($t,$l) = (shift @comments, shift @comments); + $t =~ s/\n?(\0)?$//; + $eoc = $1; + $comment .= "\n" if defined $comment; + $comment .= $t; + } until ($eoc); + $comments .= "\n" if defined $comments; + $comments .= $comment; + $self->pushline("<!--".$comment."-->\n") if defined $comment; + } + @comments = (); + + if ($self->{options}{'cpp'}) { + my @tmp = @paragraph; + @paragraph = (); + while (@tmp) { + my ($t,$l) = (shift @tmp, shift @tmp); + # #include can be followed by a filename between + # <> brackets. In that case, the argument won't be + # handled in the same call to translate_paragraph. + # Thus do not try to match "include ". + if ($t =~ m/^#[ \t]*(if |endif|undef |include|else|ifdef |ifndef |define )/si) { + if (@paragraph) { + $self->translate_paragraph(@paragraph); + @paragraph = (); + $self->pushline("\n"); + } + $self->pushline($t); + } else { + push @paragraph, ($t,$l); + } + } + } + + my $para = $self->join_lines(@paragraph); + if ( length($para) > 0 ) { + if ($translate ne "") { + # This tag should be translated + $self->pushline($self->found_string( + $para, + $paragraph[1], { + type=>"tag", + tag_options=>$translate, + comments=>$comments + })); + } else { + # Inform that this tag isn't translated in debug mode + print wrap_ref_mod($paragraph[1], "po4a::xml", dgettext ("po4a", "Content of tag %s excluded: %s"), $self->get_path, $para) + if $self->debug(); + $self->pushline($self->recode_skipped_text($para)); + } + } + # Now the paragraph is fully translated. + # If we have all the holders' translation, we can replace the + # placeholders by their translations. + # We must wait to have all the translations because the holders are + # numbered. + { + my $holder = $save_holders[$#save_holders]; + my $translation = $holder->{'translation'}; + + # Count the number of <placeholder ...> in $translation + my $count = 0; + my $str = $translation; + while ( (defined $str) + and ($str =~ m/^.*?<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s)) { + $count += 1; + $str = $2; + if ($holder->{'sub_translations'}->[$1] =~ m/<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>/s) { + $count = -1; + last; + } + } + + if ( (defined $translation) + and (scalar(@{$holder->{'sub_translations'}}) == $count)) { + # OK, all the holders of the current paragraph are + # closed (and translated). + # Replace them by their translation. + while ($translation =~ m/^(.*?)<placeholder\s+type="[^"]+"\s+id="(\d+)"\s*\/>(.*)$/s) { + # FIXME: we could also check that + # * the holder exists + # * all the holders are used + $translation = $1.$holder->{'sub_translations'}->[$2].$3; + } + # We have our translation + $holder->{'translation'} = $translation; + # And there is no need for any holder in it. + my @sub_translations = (); + $holder->{'sub_translations'} = \@sub_translations; + } + } + +} + + + +=head2 WORKING WITH THE MODULE OPTIONS + +=over 4 + +=item treat_options() + +This function fills the internal structures that contain the tags, attributes +and inline data with the options of the module (specified in the command-line +or in the initialize function). + +=back + +=cut + +sub treat_options { + my $self = shift; + + if ($self->{options}{'caseinsensitive'}) { + $self->{options}{'nodefault'} = lc $self->{options}{'nodefault'}; + $self->{options}{'tags'} = lc $self->{options}{'tags'}; + $self->{options}{'break'} = lc $self->{options}{'break'}; + $self->{options}{'_default_break'} = lc $self->{options}{'_default_break'}; + $self->{options}{'translated'} = lc $self->{options}{'translated'}; + $self->{options}{'_default_translated'} = lc $self->{options}{'_default_translated'}; + $self->{options}{'untranslated'} = lc $self->{options}{'untranslated'}; + $self->{options}{'_default_untranslated'} = lc $self->{options}{'_default_untranslated'}; + $self->{options}{'attributes'} = lc $self->{options}{'attributes'}; + $self->{options}{'_default_attributes'} = lc $self->{options}{'_default_attributes'}; + $self->{options}{'inline'} = lc $self->{options}{'inline'}; + $self->{options}{'_default_inline'} = lc $self->{options}{'_default_inline'}; + $self->{options}{'placeholder'} = lc $self->{options}{'placeholder'}; + $self->{options}{'_default_placeholder'} = lc $self->{options}{'_default_placeholder'}; + } + + $self->{options}{'nodefault'} =~ /^\s*(.*)\s*$/s; + my %list_nodefault; + foreach (split(/\s+/s,$1)) { + $list_nodefault{$_} = 1; + } + $self->{nodefault} = \%list_nodefault; + + $self->{options}{'tags'} =~ /^\s*(.*)\s*$/s; + if (length $self->{options}{'tags'}) { + warn wrap_mod("po4a::xml", + dgettext("po4a", + "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tags"); + } + foreach (split(/\s+/s,$1)) { + $_ =~ m/^(.*?)(<.*)$/; + $self->{tags}->{$2} = $1 || ""; + } + + if ($self->{options}{'tagsonly'}) { + warn wrap_mod("po4a::xml", + dgettext("po4a", + "The '%s' option is deprecated. Please use the translated/untranslated and/or break/inline/placeholder categories."), "tagsonly"); + } + + $self->{options}{'break'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{break}->{$2} = $1 || ""; + } + $self->{options}{'_default_break'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{break}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{break}->{$2}; + } + + $self->{options}{'translated'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{translated}->{$2} = $1 || ""; + } + $self->{options}{'_default_translated'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{translated}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{translated}->{$2}; + } + + $self->{options}{'untranslated'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{untranslated}->{$2} = $1 || ""; + } + $self->{options}{'_default_untranslated'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{untranslated}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{untranslated}->{$2}; + } + + $self->{options}{'attributes'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + if ($tag =~ m/^(.*?)(<.*)$/) { + $self->{attributes}->{$2} = $1 || ""; + } else { + $self->{attributes}->{$tag} = ""; + } + } + $self->{options}{'_default_attributes'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + if ($tag =~ m/^(.*?)(<.*)$/) { + $self->{attributes}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{attributes}->{$2}; + } else { + $self->{attributes}->{$tag} = "" + unless $list_nodefault{$tag} + or defined $self->{attributes}->{$tag}; + } + } + + my @list_inline; + $self->{options}{'inline'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{inline}->{$2} = $1 || ""; + } + $self->{options}{'_default_inline'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{inline}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{inline}->{$2}; + } + + $self->{options}{'placeholder'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{placeholder}->{$2} = $1 || ""; + } + $self->{options}{'_default_placeholder'} =~ /^\s*(.*)\s*$/s; + foreach my $tag (split(/\s+/s,$1)) { + $tag =~ m/^(.*?)(<.*)$/; + $self->{placeholder}->{$2} = $1 || "" + unless $list_nodefault{$2} + or defined $self->{placeholder}->{$2}; + } + + # There should be no translated and untranslated tags + foreach my $tag (keys %{$self->{translated}}) { + die wrap_mod("po4a::xml", + dgettext("po4a", + "Tag '%s' both in the %s and %s categories."), $tag, "translated", "untranslated") + if defined $self->{untranslated}->{$tag}; + } + # There should be no inline, break, and placeholder tags + foreach my $tag (keys %{$self->{inline}}) { + die wrap_mod("po4a::xml", + dgettext("po4a", + "Tag '%s' both in the %s and %s categories."), $tag, "inline", "break") + if defined $self->{break}->{$tag}; + die wrap_mod("po4a::xml", + dgettext("po4a", + "Tag '%s' both in the %s and %s categories."), $tag, "inline", "placeholder") + if defined $self->{placeholder}->{$tag}; + } + foreach my $tag (keys %{$self->{break}}) { + die wrap_mod("po4a::xml", + dgettext("po4a", + "Tag '%s' both in the %s and %s categories."), $tag, "break", "placeholder") + if defined $self->{placeholder}->{$tag}; + } +} + +=head2 GETTING TEXT FROM THE INPUT DOCUMENT + +=over + +=item get_string_until($%) + +This function returns an array with the lines (and references) from the input +document until it finds the first argument. The second argument is an options +hash. Value 0 means disabled (the default) and 1, enabled. + +The valid options are: + +=over 4 + +=item include + +This makes the returned array to contain the searched text + +=item remove + +This removes the returned stream from the input + +=item unquoted + +This ensures that the searched text is outside any quotes + +=back + +=cut + +sub get_string_until { + my ($self,$search) = (shift,shift); + my $options = shift; + my ($include,$remove,$unquoted, $regex) = (0,0,0,0); + + if (defined($options->{include})) { $include = $options->{include}; } + if (defined($options->{remove})) { $remove = $options->{remove}; } + if (defined($options->{unquoted})) { $unquoted = $options->{unquoted}; } + if (defined($options->{regex})) { $regex = $options->{regex}; } + + my ($line,$ref) = $self->shiftline(); + my (@text,$paragraph); + my ($eof,$found) = (0,0); + + $search = "\Q$search\E" unless $regex; + while (defined($line) and !$found) { + push @text, ($line,$ref); + $paragraph .= $line; + if ($unquoted) { + if ( $paragraph =~ /^((\".*?\")|(\'.*?\')|[^\"\'])*$search/s ) { + $found = 1; + } + } else { + if ( $paragraph =~ /$search/s ) { + $found = 1; + } + } + if (!$found) { + ($line,$ref)=$self->shiftline(); + } + } + + if (!defined($line)) { $eof = 1; } + + if ( $found ) { + $line = ""; + if($unquoted) { + $paragraph =~ /^(?:(?:\".*?\")|(?:\'.*?\')|[^\"\'])*?$search(.*)$/s; + $line = $1; + $text[$#text-1] =~ s/\Q$line\E$//s; + } else { + $paragraph =~ /$search(.*)$/s; + $line = $1; + $text[$#text-1] =~ s/\Q$line\E$//s; + } + if(!$include) { + $text[$#text-1] =~ /^(.*)($search.*)$/s; + $text[$#text-1] = $1; + $line = $2.$line; + } + if (defined($line) and ($line ne "")) { + $self->unshiftline ($line,$text[$#text]); + } + } + if (!$remove) { + $self->unshiftline (@text); + } + + #If we get to the end of the file, we return the whole paragraph + return ($eof,@text); +} + +=item skip_spaces(\@) + +This function receives as argument the reference to a paragraph (in the format +returned by get_string_until), skips his heading spaces and returns them as +a simple string. + +=cut + +sub skip_spaces { + my ($self,$pstring)=@_; + my $space=""; + + while (@$pstring and (@$pstring[0] =~ /^(\s+)(.*)$/s or @$pstring[0] eq "")) { + if (@$pstring[0] ne "") { + $space .= $1; + @$pstring[0] = $2; + } + + if (@$pstring[0] eq "") { + shift @$pstring; + shift @$pstring; + } + } + return $space; +} + +=item join_lines(@) + +This function returns a simple string with the text from the argument array +(discarding the references). + +=cut + +sub join_lines { + my ($self,@lines)=@_; + my ($line,$ref); + my $text = ""; + while ($#lines > 0) { + ($line,$ref) = (shift @lines,shift @lines); + $text .= $line; + } + return $text; +} + +=back + +=head1 STATUS OF THIS MODULE + +This module can translate tags and attributes. + +=head1 TODO LIST + +DOCTYPE (ENTITIES) + +There is a minimal support for the translation of entities. They are +translated as a whole, and tags are not taken into account. Multilines +entities are not supported and entities are always rewrapped during the +translation. + +MODIFY TAG TYPES FROM INHERITED MODULES +(move the tag_types structure inside the $self hash?) + +=head1 SEE ALSO + +L<po4a(7)|po4a.7>, L<Locale::Po4a::TransTractor(3pm)|Locale::Po4a::TransTractor>. + +=head1 AUTHORS + + Jordi Vilalta <jvprat@gmail.com> + Nicolas François <nicolas.francois@centraliens.net> + +=head1 COPYRIGHT AND LICENSE + + Copyright (c) 2004 by Jordi Vilalta <jvprat@gmail.com> + Copyright (c) 2008-2009 by Nicolas François <nicolas.francois@centraliens.net> + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL (see the COPYING file). + +=cut + +1;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/po4a/po4a-translate Thu Mar 12 15:43:56 2009 +0800 @@ -0,0 +1,257 @@ +#! /usr/bin/env perl +eval 'exec perl -S $0 ${1+"$@"}' + if $running_under_some_shell; + +# po4a-translate -- translate doc files using a message catalog(ie, po file) +# $Id: po4a-translate,v 1.41 2009-03-07 12:33:10 nekral-guest Exp $ +# +# Copyright 2002, 2003, 2004 by Martin Quinson (mquinson#debian.org) +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of GPL (see COPYING). + +=head1 NAME + +po4a-translate - convert a po file back to documentation format + +=head1 SYNOPSIS + +po4a-translate -f E<lt>fmtE<gt> -m E<lt>master.docE<gt> -p E<lt>XX.poE<gt> -l E<lt>XX.docE<gt> + +(XX.doc is the output, all others are inputs) + +=head1 DESCRIPTION + +The po4a (po for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +The C<po4a-translate> script is in charge of converting the translation +(which was done in a po file) under the documentation format back. The +provided C<po> file should be the translation of the C<pot> file which were +produced by po4a-gettextize(1). + +=head1 OPTIONS + +=over 4 + +=item -f, --format + +Format of the documentation you want to handle. Use the --help-format +option to see the list of available formats. + +=item -a, --addendum + +Add a file to the resulting file (to put translator's name or a section +"About this translation", for example). The first line of the file to insert +should be a PO4A header indicating where it should be added (see section +I<HOWTO add extra text to translations> in po4a(7)). + +=item -A, --addendum-charset + +Charset of the addenda. Note that all the addenda should be in the same +charset. + +=item -m, --master + +File containing the master document to translate. + +=item -M, --master-charset + +Charset of the file containing the document to translate. + +=item -l, --localized + +File where the localized (translated) document should be written. + +=item -L, --localized-charset + +Charset of the file containing the localized document. + +=item -p, --po + +File from which the message catalog should be read. + +=item -o, --option + +Extra option(s) to pass to the format plugin. Specify each option in the +'name=value' format. See the documentation of each plugin for more +information about the valid options and their meanings. + +=item -k, --keep + +Minimal threshold for translation percentage to keep (ie, write) the +resulting file (default: 80). Ie, by default, files have to be translated +at at least 80% to get written. + +=item -w, --width + +Column at which we should wrap the resulting file. + +=item -h, --help + +Show a short help message. + +=item --help-format + +List the documentation format understood by po4a. + +=item -V, --version + +Display the version of the script and exit. + +=item -v, --verbose + +Increase the verbosity of the program. + +=item -d, --debug + +Output some debugging information. + +=back + +=head1 Adding content (beside translations) to generated files + +To add some extra content to the generated document beside what you +translated (like the name of the translator, or a "about this translation" +section), you should use the C<--addendum> option. + +The first line of the addendum must be a header indicating where to put +it in the document (it can be before or after a given part of the +document). The rest of the file will be added verbatim to the resulting +file without further processing. + +Note that if po4a-translate fails to add one of the given files, it discards +the whole translation (because the missing file could be the one indicating +the author, what would prevent the users to contact him to report bugs in +the translation). + +The header has a pretty rigid syntax. For more information on how to use +this feature and how it works, please refer to the po4a(7) man page. + +=head1 SEE ALSO + +L<po4a(7)>, L<po4a-gettextize(1)>, L<po4a-updatepo(1)>, L<po4a-normalize(1)>. + + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + +Copyright 2002, 2003, 2004 by SPI, inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL (see the COPYING file). + +=cut + +use 5.006; +use strict; +use warnings; + +use Locale::Po4a::Chooser; +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; + +use Pod::Usage qw(pod2usage); +use Getopt::Long qw(GetOptions); + +Locale::Po4a::Common::textdomain("po4a"); + +sub show_version { + Locale::Po4a::Common::show_version("po4a-translate"); + exit 0; +} + + +Getopt::Long::Configure('no_auto_abbrev','no_ignore_case'); +my ($outfile,$width,$threshold)=('-',80,80); +my ($help,$help_fmt,@verbose,$debug,@addfiles,$format,@options); +my ($master_filename,$po_filename); +my ($mastchar,$locchar,$addchar); +GetOptions( + 'help|h' => \$help, + 'help-format' => \$help_fmt, + + 'master|m=s' => \$master_filename, + 'localized|l=s' => \$outfile, + 'po|p=s' => \$po_filename, + 'addendum|a=s' => \@addfiles, + 'format|f=s' => \$format, + + 'master-charset|M=s' => \$mastchar, + 'localized-charset|L=s' => \$locchar, + 'addendum-charset|A=s' => \$addchar, + + 'option|o=s' => \@options, + + 'width|w=s' => \$width, + 'verbose|v' => \@verbose, + 'debug|d' => \$debug, + 'keep|k=s' => \$threshold, + + 'version|V' => \&show_version +) or pod2usage(); + +$help && pod2usage(-verbose => 1, -exitval => 0); +$help_fmt && Locale::Po4a::Chooser::list(0); + +(defined($master_filename) && length($master_filename))||pod2usage(); +(defined($po_filename) && length($po_filename)) ||pod2usage(); +-e $master_filename || die wrap_msg(gettext("File %s does not exist."), $master_filename); +-e $po_filename || die wrap_msg(gettext("File %s does not exist."), $po_filename); + +my (@pos,@masters); +push @pos,$po_filename; +push @masters,$master_filename; + +my %options = ( + "verbose" => scalar @verbose, + "debug" => $debug); + +foreach (@options) { + if (m/^([^=]*)=(.*)$/) { + $options{$1}="$2"; + } else { + $options{$_}=1; + } +} +# parser +my $doc=Locale::Po4a::Chooser::new($format,%options); + + +# Prepare the document to be used as translator, but not parser +$doc->process('po_in_name' => \@pos, + 'file_in_name' => \@masters, + 'file_in_charset' => $mastchar, + 'file_out_charset' => $locchar, + 'addendum_charset' => $addchar); + +my ($percent,$hit,$queries) = $doc->stats(); +my $error=0; + +print STDERR wrap_msg(gettext("%s is %s%% translated (%s of %s strings)."), + $master_filename, $percent, $hit, $queries) + if (scalar @verbose) && ($percent>=$threshold); + + +if ($percent<$threshold) { + print STDERR wrap_msg(gettext("Discard the translation of %s (only %s%% translated; need %s%%)."), + $master_filename, $percent, $threshold); + unlink($outfile) if (-e $outfile); +} else { + foreach my $add (@addfiles) { + unless ($doc->addendum($add)) { + unlink($outfile) if (-e $outfile); + die wrap_msg(gettext("Discard the translation of %s (addendum %s does not apply)."), + $master_filename, $add); + } + } + $doc->write($outfile); +} + +1; +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/po4a/po4a-updatepo Thu Mar 12 15:43:56 2009 +0800 @@ -0,0 +1,235 @@ +#! /usr/bin/env perl +eval 'exec perl -S $0 ${1+"$@"}' + if $running_under_some_shell; + +# pod-updatepo -- Update the po translation of POD data. +# $Id: po4a-updatepo,v 1.44 2009-03-07 12:33:10 nekral-guest Exp $ +# +# Copyright 2002, 2003, 2004 by Martin Quinson (mquinson#debian.org) +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of GPL (see COPYING). + +=head1 NAME + +po4a-updatepo - update the translation (in po format) of documentation + +=head1 SYNOPSIS + +po4a-updatepo -f E<lt>fmtE<gt> (-m E<lt>master.docE<gt>)+ (-p E<lt>XX.poE<gt>)+ + +(XX.po are the outputs, all others are inputs) + +=head1 DESCRIPTION + +The po4a (po for anything) project goal is to ease translations (and more +interestingly, the maintenance of translations) using gettext tools on +areas where they were not expected like documentation. + +The C<po4a-updatepo> script is in charge of updating po files to make +them reflect the changes made to the original documentation file. For that, +it converts the documentation file to a pot file, and call L<msgmerge(1)> +on this new pot and on the provided po files. + +It is possible to give more than one po file (if you want to update several +languages at once), and several documentation files (if you want to store +the translations of several documents in the same po file). + +If the master document has non-ascii characters, it will convert the po files +to utf-8 (if they weren't already), in order to allow non-standard characters +in a culture independent way. + +=head1 COMMAND-LINE OPTIONS + +=over 4 + +=item -f, --format + +Format of the documentation you want to handle. Use the --help-format +option to see the list of available formats. + +=item -m, --master + +File(s) containing the master document to translate. + +=item -M, --master-charset + +Charset of the files containing the document to translate. Note that all +files must have the same charset. + +=item -p, --po + +Po file(s) to update. If these files do not exist, they are created by +C<po4a-updatepo>. + +=item -o, --option + +Extra option(s) to pass to the format plugin and other po4a internal module. +Specify each option in the 'name=value' format. See the documentation of +each plugin for more information about the valid options and their meanings. + +=item --previous + +This option adds '--previous' to the options passed to msgmerge. +It requires gettext 0.16 or later. + +=item --msgmerge-opt options + +Extra options for msgmerge. + +=item -h, --help + +Show a short help message. + +=item --help-format + +List the documentation format handled by po4a. + +=item -V, --version + +Display the version of the script and exit. + +=item -v, --verbose + +Increase the verbosity of the program. + +=item -d, --debug + +Output some debugging information. + +=back + +=head1 SEE ALSO + +L<po4a(7)>, L<po4a-gettextize(1)>, L<po4a-translate(1)>, L<po4a-normalize(1)>. + +=head1 AUTHORS + + Denis Barbier <barbier@linuxfr.org> + Martin Quinson (mquinson#debian.org) + +=head1 COPYRIGHT AND LICENSE + +Copyright 2002, 2003, 2004, 2005 by SPI, inc. + +This program is free software; you may redistribute it and/or modify it +under the terms of GPL (see the COPYING file). + +=cut + +use 5.006; +use strict; +use warnings; + +use Getopt::Long qw(GetOptions); +use Locale::Po4a::Po; + +use Locale::Po4a::Chooser; +use Locale::Po4a::TransTractor; +use Locale::Po4a::Common; + +use Pod::Usage qw(pod2usage); + +use File::Temp; + +Locale::Po4a::Common::textdomain('po4a'); + +sub show_version { + Locale::Po4a::Common::show_version("po4a-updatepo"); + exit 0; +} + + +# init commandline parser +Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev'); + +# Parse our options +my (@masterfiles,@pofiles); +my ($help,$help_fmt,$verbose,$debug,$format,@options); +my $mastchar; +my $previous; +my $msgmerge_opt = ""; +GetOptions('help|h' => \$help, + 'help-format' => \$help_fmt, + + 'master|m=s' => \@masterfiles, + 'po|p=s' => \@pofiles, + 'format|f=s' => \$format, + + 'master-charset|M=s' => \$mastchar, + + 'option|o=s' => \@options, + + 'previous' => \$previous, + 'msgmerge-opt=s' => \$msgmerge_opt, + + 'verbose|v' => \$verbose, + 'debug|d' => \$debug, + 'version|V' => \&show_version) + or pod2usage(); + +$help && pod2usage (-verbose => 1, -exitval => 0); +$help_fmt && Locale::Po4a::Chooser::list(0); +pod2usage () if scalar @masterfiles < 1 || scalar @pofiles < 1; + +$msgmerge_opt .= " --previous" if $previous; + +my %options = ( + "verbose" => $verbose, + "debug" => $debug); + +foreach (@options) { + if (m/^([^=]*)=(.*)$/) { + $options{$1}="$2"; + } else { + $options{$_}=1; + } +} + +# parser +my ($doc)=Locale::Po4a::Chooser::new($format,%options); + +map { -e $_ || die wrap_msg(gettext("File %s does not exist."), $_) } @masterfiles; +map { die wrap_msg(gettext("po4a-updatepo can't take the input po from stdin.")) + if $_ eq '-' && !-e '-'} @pofiles; + +my ($pot_filename); +(undef,$pot_filename)=File::Temp->tempfile("po4a-updatepoXXXX", + DIR => "/tmp", + SUFFIX => ".pot", + OPEN => 0, + UNLINK => 0) + or die wrap_msg(gettext("Can't create a temporary pot file: %s"), $!); + + +print STDERR wrap_msg(gettext("Parse input files... ")) if $verbose; + +$doc->{TT}{utf_mode} = 1; + +$doc->process('file_in_name' => \@masterfiles, + 'file_in_charset' => $mastchar, + 'po_out_name' => $pot_filename, + 'debug' => $debug, + 'verbose' => $verbose); + +print STDERR wrap_msg(gettext("done.")) if $verbose; + + +while (my $po_filename=shift @pofiles) { + if (-e $po_filename) { + print STDERR wrap_msg(gettext("Updating %s:"), $po_filename) + if $verbose; + my $cmd = "msgmerge $msgmerge_opt -U $po_filename $pot_filename"; + system ($cmd) == 0 + or die wrap_msg(gettext("Error while running msgmerge: %s"), $!); + system "msgfmt --statistics -v -o /dev/null $po_filename" + if $verbose; + } else { + print STDERR wrap_msg(gettext("Creating %s:"), $po_filename) + if $verbose; + system ("cp",$pot_filename,$po_filename) == 0 + or die wrap_msg(gettext("Error while copying the po file: %s"), $!); + } +} + +unlink($pot_filename);