c1a949cae5
svn: r6144
1357 lines
34 KiB
Plaintext
1357 lines
34 KiB
Plaintext
#!@INTLTOOL_PERL@ -w
|
|
# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
|
|
|
#
|
|
# The Intltool Message Merger
|
|
#
|
|
# Copyright (C) 2000, 2003 Free Software Foundation.
|
|
# Copyright (C) 2000, 2001 Eazel, Inc
|
|
#
|
|
# Intltool is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# version 2 published by the Free Software Foundation.
|
|
#
|
|
# Intltool is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
#
|
|
# As a special exception to the GNU General Public License, if you
|
|
# distribute this file as part of a program that contains a
|
|
# configuration script generated by Autoconf, you may include it under
|
|
# the same distribution terms that you use for the rest of that program.
|
|
#
|
|
# Authors: Maciej Stachowiak <mjs@noisehavoc.org>
|
|
# Kenneth Christiansen <kenneth@gnu.org>
|
|
# Darin Adler <darin@bentspoon.com>
|
|
#
|
|
# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
|
|
#
|
|
|
|
## Release information
|
|
my $PROGRAM = "intltool-merge";
|
|
my $PACKAGE = "intltool";
|
|
my $VERSION = "0.34.2";
|
|
|
|
## Loaded modules
|
|
use strict;
|
|
use Getopt::Long;
|
|
use Text::Wrap;
|
|
use File::Basename;
|
|
|
|
my $must_end_tag = -1;
|
|
my $last_depth = -1;
|
|
my $translation_depth = -1;
|
|
my @tag_stack = ();
|
|
my @entered_tag = ();
|
|
my @translation_strings = ();
|
|
my $leading_space = "";
|
|
|
|
## Scalars used by the option stuff
|
|
my $HELP_ARG = 0;
|
|
my $VERSION_ARG = 0;
|
|
my $BA_STYLE_ARG = 0;
|
|
my $XML_STYLE_ARG = 0;
|
|
my $KEYS_STYLE_ARG = 0;
|
|
my $DESKTOP_STYLE_ARG = 0;
|
|
my $SCHEMAS_STYLE_ARG = 0;
|
|
my $RFC822DEB_STYLE_ARG = 0;
|
|
my $QUIET_ARG = 0;
|
|
my $PASS_THROUGH_ARG = 0;
|
|
my $UTF8_ARG = 0;
|
|
my $MULTIPLE_OUTPUT = 0;
|
|
my $cache_file;
|
|
|
|
## Handle options
|
|
GetOptions
|
|
(
|
|
"help" => \$HELP_ARG,
|
|
"version" => \$VERSION_ARG,
|
|
"quiet|q" => \$QUIET_ARG,
|
|
"oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
|
|
"ba-style|b" => \$BA_STYLE_ARG,
|
|
"xml-style|x" => \$XML_STYLE_ARG,
|
|
"keys-style|k" => \$KEYS_STYLE_ARG,
|
|
"desktop-style|d" => \$DESKTOP_STYLE_ARG,
|
|
"schemas-style|s" => \$SCHEMAS_STYLE_ARG,
|
|
"rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
|
|
"pass-through|p" => \$PASS_THROUGH_ARG,
|
|
"utf8|u" => \$UTF8_ARG,
|
|
"multiple-output|m" => \$MULTIPLE_OUTPUT,
|
|
"cache|c=s" => \$cache_file
|
|
) or &error;
|
|
|
|
my $PO_DIR;
|
|
my $FILE;
|
|
my $OUTFILE;
|
|
|
|
my %po_files_by_lang = ();
|
|
my %translations = ();
|
|
my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@";
|
|
my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');
|
|
|
|
# Use this instead of \w for XML files to handle more possible characters.
|
|
my $w = "[-A-Za-z0-9._:]";
|
|
|
|
# XML quoted string contents
|
|
my $q = "[^\\\"]*";
|
|
|
|
## Check for options.
|
|
|
|
if ($VERSION_ARG)
|
|
{
|
|
&print_version;
|
|
}
|
|
elsif ($HELP_ARG)
|
|
{
|
|
&print_help;
|
|
}
|
|
elsif ($BA_STYLE_ARG && @ARGV > 2)
|
|
{
|
|
&utf8_sanity_check;
|
|
&preparation;
|
|
&print_message;
|
|
&ba_merge_translations;
|
|
&finalize;
|
|
}
|
|
elsif ($XML_STYLE_ARG && @ARGV > 2)
|
|
{
|
|
&utf8_sanity_check;
|
|
&preparation;
|
|
&print_message;
|
|
&xml_merge_output;
|
|
&finalize;
|
|
}
|
|
elsif ($KEYS_STYLE_ARG && @ARGV > 2)
|
|
{
|
|
&utf8_sanity_check;
|
|
&preparation;
|
|
&print_message;
|
|
&keys_merge_translations;
|
|
&finalize;
|
|
}
|
|
elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
|
|
{
|
|
&utf8_sanity_check;
|
|
&preparation;
|
|
&print_message;
|
|
&desktop_merge_translations;
|
|
&finalize;
|
|
}
|
|
elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
|
|
{
|
|
&utf8_sanity_check;
|
|
&preparation;
|
|
&print_message;
|
|
&schemas_merge_translations;
|
|
&finalize;
|
|
}
|
|
elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
|
|
{
|
|
&preparation;
|
|
&print_message;
|
|
&rfc822deb_merge_translations;
|
|
&finalize;
|
|
}
|
|
else
|
|
{
|
|
&print_help;
|
|
}
|
|
|
|
exit;
|
|
|
|
## Sub for printing release information
|
|
sub print_version
|
|
{
|
|
print <<_EOF_;
|
|
${PROGRAM} (${PACKAGE}) ${VERSION}
|
|
Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
|
|
|
|
Copyright (C) 2000-2003 Free Software Foundation, Inc.
|
|
Copyright (C) 2000-2001 Eazel, Inc.
|
|
This is free software; see the source for copying conditions. There is NO
|
|
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
_EOF_
|
|
exit;
|
|
}
|
|
|
|
## Sub for printing usage information
|
|
sub print_help
|
|
{
|
|
print <<_EOF_;
|
|
Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
|
|
Generates an output file that includes some localized attributes from an
|
|
untranslated source file.
|
|
|
|
Mandatory options: (exactly one must be specified)
|
|
-b, --ba-style includes translations in the bonobo-activation style
|
|
-d, --desktop-style includes translations in the desktop style
|
|
-k, --keys-style includes translations in the keys style
|
|
-s, --schemas-style includes translations in the schemas style
|
|
-r, --rfc822deb-style includes translations in the RFC822 style
|
|
-x, --xml-style includes translations in the standard xml style
|
|
|
|
Other options:
|
|
-u, --utf8 convert all strings to UTF-8 before merging
|
|
(default for everything except RFC822 style)
|
|
-p, --pass-through deprecated, does nothing and issues a warning
|
|
-m, --multiple-output output one localized file per locale, instead of
|
|
a single file containing all localized elements
|
|
-c, --cache=FILE specify cache file name
|
|
(usually \$top_builddir/po/.intltool-merge-cache)
|
|
-q, --quiet suppress most messages
|
|
--help display this help and exit
|
|
--version output version information and exit
|
|
|
|
Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
|
|
or send email to <xml-i18n-tools\@gnome.org>.
|
|
_EOF_
|
|
exit;
|
|
}
|
|
|
|
|
|
## Sub for printing error messages
|
|
sub print_error
|
|
{
|
|
print STDERR "Try `${PROGRAM} --help' for more information.\n";
|
|
exit;
|
|
}
|
|
|
|
|
|
sub print_message
|
|
{
|
|
print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
|
|
}
|
|
|
|
|
|
sub preparation
|
|
{
|
|
$PO_DIR = $ARGV[0];
|
|
$FILE = $ARGV[1];
|
|
$OUTFILE = $ARGV[2];
|
|
|
|
&gather_po_files;
|
|
&get_translation_database;
|
|
}
|
|
|
|
# General-purpose code for looking up translations in .po files
|
|
|
|
sub po_file2lang
|
|
{
|
|
my ($tmp) = @_;
|
|
$tmp =~ s/^.*\/(.*)\.po$/$1/;
|
|
return $tmp;
|
|
}
|
|
|
|
sub gather_po_files
|
|
{
|
|
for my $po_file (glob "$PO_DIR/*.po") {
|
|
$po_files_by_lang{po_file2lang($po_file)} = $po_file;
|
|
}
|
|
}
|
|
|
|
sub get_local_charset
|
|
{
|
|
my ($encoding) = @_;
|
|
my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias";
|
|
|
|
# seek character encoding aliases in charset.alias (glib)
|
|
|
|
if (open CHARSET_ALIAS, $alias_file)
|
|
{
|
|
while (<CHARSET_ALIAS>)
|
|
{
|
|
next if /^\#/;
|
|
return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
|
|
}
|
|
|
|
close CHARSET_ALIAS;
|
|
}
|
|
|
|
# if not found, return input string
|
|
|
|
return $encoding;
|
|
}
|
|
|
|
sub get_po_encoding
|
|
{
|
|
my ($in_po_file) = @_;
|
|
my $encoding = "";
|
|
|
|
open IN_PO_FILE, $in_po_file or die;
|
|
while (<IN_PO_FILE>)
|
|
{
|
|
## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
|
|
if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
|
|
{
|
|
$encoding = $1;
|
|
last;
|
|
}
|
|
}
|
|
close IN_PO_FILE;
|
|
|
|
if (!$encoding)
|
|
{
|
|
print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
|
|
$encoding = "ISO-8859-1";
|
|
}
|
|
|
|
system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
|
|
if ($?) {
|
|
$encoding = get_local_charset($encoding);
|
|
}
|
|
|
|
return $encoding
|
|
}
|
|
|
|
sub utf8_sanity_check
|
|
{
|
|
print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
|
|
$UTF8_ARG = 1;
|
|
}
|
|
|
|
sub get_translation_database
|
|
{
|
|
if ($cache_file) {
|
|
&get_cached_translation_database;
|
|
} else {
|
|
&create_translation_database;
|
|
}
|
|
}
|
|
|
|
sub get_newest_po_age
|
|
{
|
|
my $newest_age;
|
|
|
|
foreach my $file (values %po_files_by_lang)
|
|
{
|
|
my $file_age = -M $file;
|
|
$newest_age = $file_age if !$newest_age || $file_age < $newest_age;
|
|
}
|
|
|
|
$newest_age = 0 if !$newest_age;
|
|
|
|
return $newest_age;
|
|
}
|
|
|
|
sub create_cache
|
|
{
|
|
print "Generating and caching the translation database\n" unless $QUIET_ARG;
|
|
|
|
&create_translation_database;
|
|
|
|
open CACHE, ">$cache_file" || die;
|
|
print CACHE join "\x01", %translations;
|
|
close CACHE;
|
|
}
|
|
|
|
sub load_cache
|
|
{
|
|
print "Found cached translation database\n" unless $QUIET_ARG;
|
|
|
|
my $contents;
|
|
open CACHE, "<$cache_file" || die;
|
|
{
|
|
local $/;
|
|
$contents = <CACHE>;
|
|
}
|
|
close CACHE;
|
|
%translations = split "\x01", $contents;
|
|
}
|
|
|
|
sub get_cached_translation_database
|
|
{
|
|
my $cache_file_age = -M $cache_file;
|
|
if (defined $cache_file_age)
|
|
{
|
|
if ($cache_file_age <= &get_newest_po_age)
|
|
{
|
|
&load_cache;
|
|
return;
|
|
}
|
|
print "Found too-old cached translation database\n" unless $QUIET_ARG;
|
|
}
|
|
|
|
&create_cache;
|
|
}
|
|
|
|
sub create_translation_database
|
|
{
|
|
for my $lang (keys %po_files_by_lang)
|
|
{
|
|
my $po_file = $po_files_by_lang{$lang};
|
|
|
|
if ($UTF8_ARG)
|
|
{
|
|
my $encoding = get_po_encoding ($po_file);
|
|
|
|
if (lc $encoding eq "utf-8")
|
|
{
|
|
open PO_FILE, "<$po_file";
|
|
}
|
|
else
|
|
{
|
|
print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
|
|
|
|
open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
|
|
}
|
|
}
|
|
else
|
|
{
|
|
open PO_FILE, "<$po_file";
|
|
}
|
|
|
|
my $nextfuzzy = 0;
|
|
my $inmsgid = 0;
|
|
my $inmsgstr = 0;
|
|
my $msgid = "";
|
|
my $msgstr = "";
|
|
|
|
while (<PO_FILE>)
|
|
{
|
|
$nextfuzzy = 1 if /^#, fuzzy/;
|
|
|
|
if (/^msgid "((\\.|[^\\])*)"/ )
|
|
{
|
|
$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
|
|
$msgid = "";
|
|
$msgstr = "";
|
|
|
|
if ($nextfuzzy) {
|
|
$inmsgid = 0;
|
|
} else {
|
|
$msgid = unescape_po_string($1);
|
|
$inmsgid = 1;
|
|
}
|
|
$inmsgstr = 0;
|
|
$nextfuzzy = 0;
|
|
}
|
|
|
|
if (/^msgstr "((\\.|[^\\])*)"/)
|
|
{
|
|
$msgstr = unescape_po_string($1);
|
|
$inmsgstr = 1;
|
|
$inmsgid = 0;
|
|
}
|
|
|
|
if (/^"((\\.|[^\\])*)"/)
|
|
{
|
|
$msgid .= unescape_po_string($1) if $inmsgid;
|
|
$msgstr .= unescape_po_string($1) if $inmsgstr;
|
|
}
|
|
}
|
|
$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
|
|
}
|
|
}
|
|
|
|
sub finalize
|
|
{
|
|
}
|
|
|
|
sub unescape_one_sequence
|
|
{
|
|
my ($sequence) = @_;
|
|
|
|
return "\\" if $sequence eq "\\\\";
|
|
return "\"" if $sequence eq "\\\"";
|
|
return "\n" if $sequence eq "\\n";
|
|
return "\r" if $sequence eq "\\r";
|
|
return "\t" if $sequence eq "\\t";
|
|
return "\b" if $sequence eq "\\b";
|
|
return "\f" if $sequence eq "\\f";
|
|
return "\a" if $sequence eq "\\a";
|
|
return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
|
|
|
|
return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
|
|
return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
|
|
|
|
# FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
|
|
|
|
return $sequence;
|
|
}
|
|
|
|
sub unescape_po_string
|
|
{
|
|
my ($string) = @_;
|
|
|
|
$string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
|
|
|
|
return $string;
|
|
}
|
|
|
|
## NOTE: deal with < - < but not > - > because it seems its ok to have
|
|
## > in the entity. For further info please look at #84738.
|
|
sub entity_decode
|
|
{
|
|
local ($_) = @_;
|
|
|
|
s/'/'/g; # '
|
|
s/"/"/g; # "
|
|
s/&/&/g;
|
|
s/</</g;
|
|
|
|
return $_;
|
|
}
|
|
|
|
# entity_encode: (string)
|
|
#
|
|
# Encode the given string to XML format (encode '<' etc).
|
|
|
|
sub entity_encode
|
|
{
|
|
my ($pre_encoded) = @_;
|
|
|
|
my @list_of_chars = unpack ('C*', $pre_encoded);
|
|
|
|
# with UTF-8 we only encode minimalistic
|
|
return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
|
|
}
|
|
|
|
sub entity_encode_int_minimalist
|
|
{
|
|
return """ if $_ == 34;
|
|
return "&" if $_ == 38;
|
|
return "'" if $_ == 39;
|
|
return "<" if $_ == 60;
|
|
return chr $_;
|
|
}
|
|
|
|
sub entity_encoded_translation
|
|
{
|
|
my ($lang, $string) = @_;
|
|
|
|
my $translation = $translations{$lang, $string};
|
|
return $string if !$translation;
|
|
return entity_encode ($translation);
|
|
}
|
|
|
|
## XML (bonobo-activation specific) merge code
|
|
|
|
sub ba_merge_translations
|
|
{
|
|
my $source;
|
|
|
|
{
|
|
local $/; # slurp mode
|
|
open INPUT, "<$FILE" or die "can't open $FILE: $!";
|
|
$source = <INPUT>;
|
|
close INPUT;
|
|
}
|
|
|
|
open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
|
|
# Binmode so that selftest works ok if using a native Win32 Perl...
|
|
binmode (OUTPUT) if $^O eq 'MSWin32';
|
|
|
|
while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
|
|
{
|
|
print OUTPUT $1;
|
|
|
|
my $node = $2 . "\n";
|
|
|
|
my @strings = ();
|
|
$_ = $node;
|
|
while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
|
|
push @strings, entity_decode($3);
|
|
}
|
|
print OUTPUT;
|
|
|
|
my %langs;
|
|
for my $string (@strings)
|
|
{
|
|
for my $lang (keys %po_files_by_lang)
|
|
{
|
|
$langs{$lang} = 1 if $translations{$lang, $string};
|
|
}
|
|
}
|
|
|
|
for my $lang (sort keys %langs)
|
|
{
|
|
$_ = $node;
|
|
s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
|
|
s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
|
|
print OUTPUT;
|
|
}
|
|
}
|
|
|
|
print OUTPUT $source;
|
|
|
|
close OUTPUT;
|
|
}
|
|
|
|
|
|
## XML (non-bonobo-activation) merge code
|
|
|
|
|
|
# Process tag attributes
|
|
# Only parameter is a HASH containing attributes -> values mapping
|
|
sub getAttributeString
|
|
{
|
|
my $sub = shift;
|
|
my $do_translate = shift || 0;
|
|
my $language = shift || "";
|
|
my $result = "";
|
|
my $translate = shift;
|
|
foreach my $e (reverse(sort(keys %{ $sub }))) {
|
|
my $key = $e;
|
|
my $string = $sub->{$e};
|
|
my $quote = '"';
|
|
|
|
$string =~ s/^[\s]+//;
|
|
$string =~ s/[\s]+$//;
|
|
|
|
if ($string =~ /^'.*'$/)
|
|
{
|
|
$quote = "'";
|
|
}
|
|
$string =~ s/^['"]//g;
|
|
$string =~ s/['"]$//g;
|
|
|
|
if ($do_translate && $key =~ /^_/) {
|
|
$key =~ s|^_||g;
|
|
if ($language) {
|
|
# Handle translation
|
|
my $decode_string = entity_decode($string);
|
|
my $translation = $translations{$language, $decode_string};
|
|
if ($translation) {
|
|
$translation = entity_encode($translation);
|
|
$string = $translation;
|
|
}
|
|
$$translate = 2;
|
|
} else {
|
|
$$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
|
|
}
|
|
}
|
|
|
|
$result .= " $key=$quote$string$quote";
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
|
|
sub getXMLstring
|
|
{
|
|
my $ref = shift;
|
|
my $spacepreserve = shift || 0;
|
|
my @list = @{ $ref };
|
|
my $result = "";
|
|
|
|
my $count = scalar(@list);
|
|
my $attrs = $list[0];
|
|
my $index = 1;
|
|
|
|
$spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
|
|
$spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
|
|
|
|
while ($index < $count) {
|
|
my $type = $list[$index];
|
|
my $content = $list[$index+1];
|
|
if (! $type ) {
|
|
# We've got CDATA
|
|
if ($content) {
|
|
# lets strip the whitespace here, and *ONLY* here
|
|
$content =~ s/\s+/ /gs if (!$spacepreserve);
|
|
$result .= $content;
|
|
}
|
|
} elsif ( "$type" ne "1" ) {
|
|
# We've got another element
|
|
$result .= "<$type";
|
|
$result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
|
|
if ($content) {
|
|
my $subresult = getXMLstring($content, $spacepreserve);
|
|
if ($subresult) {
|
|
$result .= ">".$subresult . "</$type>";
|
|
} else {
|
|
$result .= "/>";
|
|
}
|
|
} else {
|
|
$result .= "/>";
|
|
}
|
|
}
|
|
$index += 2;
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
# Translate list of nodes if necessary
|
|
sub translate_subnodes
|
|
{
|
|
my $fh = shift;
|
|
my $content = shift;
|
|
my $language = shift || "";
|
|
my $singlelang = shift || 0;
|
|
my $spacepreserve = shift || 0;
|
|
|
|
my @nodes = @{ $content };
|
|
|
|
my $count = scalar(@nodes);
|
|
my $index = 0;
|
|
while ($index < $count) {
|
|
my $type = $nodes[$index];
|
|
my $rest = $nodes[$index+1];
|
|
if ($singlelang) {
|
|
my $oldMO = $MULTIPLE_OUTPUT;
|
|
$MULTIPLE_OUTPUT = 1;
|
|
traverse($fh, $type, $rest, $language, $spacepreserve);
|
|
$MULTIPLE_OUTPUT = $oldMO;
|
|
} else {
|
|
traverse($fh, $type, $rest, $language, $spacepreserve);
|
|
}
|
|
$index += 2;
|
|
}
|
|
}
|
|
|
|
sub isWellFormedXmlFragment
|
|
{
|
|
my $ret = eval 'require XML::Parser';
|
|
if(!$ret) {
|
|
die "You must have XML::Parser installed to run $0\n\n";
|
|
}
|
|
|
|
my $fragment = shift;
|
|
return 0 if (!$fragment);
|
|
|
|
$fragment = "<root>$fragment</root>";
|
|
my $xp = new XML::Parser(Style => 'Tree');
|
|
my $tree = 0;
|
|
eval { $tree = $xp->parse($fragment); };
|
|
return $tree;
|
|
}
|
|
|
|
sub traverse
|
|
{
|
|
my $fh = shift;
|
|
my $nodename = shift;
|
|
my $content = shift;
|
|
my $language = shift || "";
|
|
my $spacepreserve = shift || 0;
|
|
|
|
if (!$nodename) {
|
|
if ($content =~ /^[\s]*$/) {
|
|
$leading_space .= $content;
|
|
}
|
|
print $fh $content;
|
|
} else {
|
|
# element
|
|
my @all = @{ $content };
|
|
my $attrs = shift @all;
|
|
my $translate = 0;
|
|
my $outattr = getAttributeString($attrs, 1, $language, \$translate);
|
|
|
|
if ($nodename =~ /^_/) {
|
|
$translate = 1;
|
|
$nodename =~ s/^_//;
|
|
}
|
|
my $lookup = '';
|
|
|
|
$spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
|
|
$spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
|
|
|
|
print $fh "<$nodename", $outattr;
|
|
if ($translate) {
|
|
$lookup = getXMLstring($content, $spacepreserve);
|
|
if (!$spacepreserve) {
|
|
$lookup =~ s/^\s+//s;
|
|
$lookup =~ s/\s+$//s;
|
|
}
|
|
|
|
if ($lookup || $translate == 2) {
|
|
my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
|
|
if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
|
|
$translation = $lookup if (!$translation);
|
|
print $fh " xml:lang=\"", $language, "\"" if $language;
|
|
print $fh ">";
|
|
if ($translate == 2) {
|
|
translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
|
|
} else {
|
|
print $fh $translation;
|
|
}
|
|
print $fh "</$nodename>";
|
|
|
|
return; # this means there will be no same translation with xml:lang="$language"...
|
|
# if we want them both, just remove this "return"
|
|
} else {
|
|
print $fh ">";
|
|
if ($translate == 2) {
|
|
translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
|
|
} else {
|
|
print $fh $lookup;
|
|
}
|
|
print $fh "</$nodename>";
|
|
}
|
|
} else {
|
|
print $fh "/>";
|
|
}
|
|
|
|
for my $lang (sort keys %po_files_by_lang) {
|
|
if ($MULTIPLE_OUTPUT && $lang ne "$language") {
|
|
next;
|
|
}
|
|
if ($lang) {
|
|
# Handle translation
|
|
#
|
|
my $translate = 0;
|
|
my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
|
|
my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
|
|
if ($translate && !$translation) {
|
|
$translation = $lookup;
|
|
}
|
|
|
|
if ($translation || $translate) {
|
|
print $fh "\n";
|
|
$leading_space =~ s/.*\n//g;
|
|
print $fh $leading_space;
|
|
print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
|
|
if ($translate == 2) {
|
|
translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
|
|
} else {
|
|
print $fh $translation;
|
|
}
|
|
print $fh "</$nodename>";
|
|
}
|
|
}
|
|
}
|
|
|
|
} else {
|
|
my $count = scalar(@all);
|
|
if ($count > 0) {
|
|
print $fh ">";
|
|
my $index = 0;
|
|
while ($index < $count) {
|
|
my $type = $all[$index];
|
|
my $rest = $all[$index+1];
|
|
traverse($fh, $type, $rest, $language, $spacepreserve);
|
|
$index += 2;
|
|
}
|
|
print $fh "</$nodename>";
|
|
} else {
|
|
print $fh "/>";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
sub intltool_tree_comment
|
|
{
|
|
my $expat = shift;
|
|
my $data = shift;
|
|
my $clist = $expat->{Curlist};
|
|
my $pos = $#$clist;
|
|
|
|
push @$clist, 1 => $data;
|
|
}
|
|
|
|
sub intltool_tree_cdatastart
|
|
{
|
|
my $expat = shift;
|
|
my $clist = $expat->{Curlist};
|
|
my $pos = $#$clist;
|
|
|
|
push @$clist, 0 => $expat->original_string();
|
|
}
|
|
|
|
sub intltool_tree_cdataend
|
|
{
|
|
my $expat = shift;
|
|
my $clist = $expat->{Curlist};
|
|
my $pos = $#$clist;
|
|
|
|
$clist->[$pos] .= $expat->original_string();
|
|
}
|
|
|
|
sub intltool_tree_char
|
|
{
|
|
my $expat = shift;
|
|
my $text = shift;
|
|
my $clist = $expat->{Curlist};
|
|
my $pos = $#$clist;
|
|
|
|
# Use original_string so that we retain escaped entities
|
|
# in CDATA sections.
|
|
#
|
|
if ($pos > 0 and $clist->[$pos - 1] eq '0') {
|
|
$clist->[$pos] .= $expat->original_string();
|
|
} else {
|
|
push @$clist, 0 => $expat->original_string();
|
|
}
|
|
}
|
|
|
|
sub intltool_tree_start
|
|
{
|
|
my $expat = shift;
|
|
my $tag = shift;
|
|
my @origlist = ();
|
|
|
|
# Use original_string so that we retain escaped entities
|
|
# in attribute values. We must convert the string to an
|
|
# @origlist array to conform to the structure of the Tree
|
|
# Style.
|
|
#
|
|
my @original_array = split /\x/, $expat->original_string();
|
|
my $source = $expat->original_string();
|
|
|
|
# Remove leading tag.
|
|
#
|
|
$source =~ s|^\s*<\s*(\S+)||s;
|
|
|
|
# Grab attribute key/value pairs and push onto @origlist array.
|
|
#
|
|
while ($source)
|
|
{
|
|
if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
|
|
{
|
|
$source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
|
|
push @origlist, $1;
|
|
push @origlist, '"' . $2 . '"';
|
|
}
|
|
elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
|
|
{
|
|
$source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
|
|
push @origlist, $1;
|
|
push @origlist, "'" . $2 . "'";
|
|
}
|
|
else
|
|
{
|
|
last;
|
|
}
|
|
}
|
|
|
|
my $ol = [ { @origlist } ];
|
|
|
|
push @{ $expat->{Lists} }, $expat->{Curlist};
|
|
push @{ $expat->{Curlist} }, $tag => $ol;
|
|
$expat->{Curlist} = $ol;
|
|
}
|
|
|
|
sub readXml
|
|
{
|
|
my $filename = shift || return;
|
|
if(!-f $filename) {
|
|
die "ERROR Cannot find filename: $filename\n";
|
|
}
|
|
|
|
my $ret = eval 'require XML::Parser';
|
|
if(!$ret) {
|
|
die "You must have XML::Parser installed to run $0\n\n";
|
|
}
|
|
my $xp = new XML::Parser(Style => 'Tree');
|
|
$xp->setHandlers(Char => \&intltool_tree_char);
|
|
$xp->setHandlers(Start => \&intltool_tree_start);
|
|
$xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
|
|
$xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
|
|
my $tree = $xp->parsefile($filename);
|
|
|
|
# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
|
|
# would be:
|
|
# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
|
|
# 0, "Howdy", ref, [{}]], 0, "do" ] ]
|
|
|
|
return $tree;
|
|
}
|
|
|
|
sub print_header
|
|
{
|
|
my $infile = shift;
|
|
my $fh = shift;
|
|
my $source;
|
|
|
|
if(!-f $infile) {
|
|
die "ERROR Cannot find filename: $infile\n";
|
|
}
|
|
|
|
print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
|
|
{
|
|
local $/;
|
|
open DOCINPUT, "<${FILE}" or die;
|
|
$source = <DOCINPUT>;
|
|
close DOCINPUT;
|
|
}
|
|
if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
|
|
{
|
|
print $fh "$1\n";
|
|
}
|
|
elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
|
|
{
|
|
print $fh "$1\n";
|
|
}
|
|
}
|
|
|
|
sub parseTree
|
|
{
|
|
my $fh = shift;
|
|
my $ref = shift;
|
|
my $language = shift || "";
|
|
|
|
my $name = shift @{ $ref };
|
|
my $cont = shift @{ $ref };
|
|
|
|
while (!$name || "$name" eq "1") {
|
|
$name = shift @{ $ref };
|
|
$cont = shift @{ $ref };
|
|
}
|
|
|
|
my $spacepreserve = 0;
|
|
my $attrs = @{$cont}[0];
|
|
$spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
|
|
|
|
traverse($fh, $name, $cont, $language, $spacepreserve);
|
|
}
|
|
|
|
sub xml_merge_output
|
|
{
|
|
my $source;
|
|
|
|
if ($MULTIPLE_OUTPUT) {
|
|
for my $lang (sort keys %po_files_by_lang) {
|
|
if ( ! -e $lang ) {
|
|
mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
|
|
}
|
|
open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
|
|
binmode (OUTPUT) if $^O eq 'MSWin32';
|
|
my $tree = readXml($FILE);
|
|
print_header($FILE, \*OUTPUT);
|
|
parseTree(\*OUTPUT, $tree, $lang);
|
|
close OUTPUT;
|
|
print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
|
|
}
|
|
}
|
|
open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
|
|
binmode (OUTPUT) if $^O eq 'MSWin32';
|
|
my $tree = readXml($FILE);
|
|
print_header($FILE, \*OUTPUT);
|
|
parseTree(\*OUTPUT, $tree);
|
|
close OUTPUT;
|
|
print "CREATED $OUTFILE\n" unless $QUIET_ARG;
|
|
}
|
|
|
|
sub keys_merge_translations
|
|
{
|
|
open INPUT, "<${FILE}" or die;
|
|
open OUTPUT, ">${OUTFILE}" or die;
|
|
binmode (OUTPUT) if $^O eq 'MSWin32';
|
|
|
|
while (<INPUT>)
|
|
{
|
|
if (s/^(\s*)_(\w+=(.*))/$1$2/)
|
|
{
|
|
my $string = $3;
|
|
|
|
print OUTPUT;
|
|
|
|
my $non_translated_line = $_;
|
|
|
|
for my $lang (sort keys %po_files_by_lang)
|
|
{
|
|
my $translation = $translations{$lang, $string};
|
|
next if !$translation;
|
|
|
|
$_ = $non_translated_line;
|
|
s/(\w+)=.*/[$lang]$1=$translation/;
|
|
print OUTPUT;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
print OUTPUT;
|
|
}
|
|
}
|
|
|
|
close OUTPUT;
|
|
close INPUT;
|
|
}
|
|
|
|
sub desktop_merge_translations
|
|
{
|
|
open INPUT, "<${FILE}" or die;
|
|
open OUTPUT, ">${OUTFILE}" or die;
|
|
binmode (OUTPUT) if $^O eq 'MSWin32';
|
|
|
|
while (<INPUT>)
|
|
{
|
|
if (s/^(\s*)_(\w+=(.*))/$1$2/)
|
|
{
|
|
my $string = $3;
|
|
|
|
print OUTPUT;
|
|
|
|
my $non_translated_line = $_;
|
|
|
|
for my $lang (sort keys %po_files_by_lang)
|
|
{
|
|
my $translation = $translations{$lang, $string};
|
|
next if !$translation;
|
|
|
|
$_ = $non_translated_line;
|
|
s/(\w+)=.*/${1}[$lang]=$translation/;
|
|
print OUTPUT;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
print OUTPUT;
|
|
}
|
|
}
|
|
|
|
close OUTPUT;
|
|
close INPUT;
|
|
}
|
|
|
|
sub schemas_merge_translations
|
|
{
|
|
my $source;
|
|
|
|
{
|
|
local $/; # slurp mode
|
|
open INPUT, "<$FILE" or die "can't open $FILE: $!";
|
|
$source = <INPUT>;
|
|
close INPUT;
|
|
}
|
|
|
|
open OUTPUT, ">$OUTFILE" or die;
|
|
binmode (OUTPUT) if $^O eq 'MSWin32';
|
|
|
|
# FIXME: support attribute translations
|
|
|
|
# Empty nodes never need translation, so unmark all of them.
|
|
# For example, <_foo/> is just replaced by <foo/>.
|
|
$source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
|
|
|
|
while ($source =~ s/
|
|
(.*?)
|
|
(\s+)(<locale\ name="C">(\s*)
|
|
(<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
|
|
(<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
|
|
(<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
|
|
<\/locale>)
|
|
//sx)
|
|
{
|
|
print OUTPUT $1;
|
|
|
|
my $locale_start_spaces = $2 ? $2 : '';
|
|
my $default_spaces = $4 ? $4 : '';
|
|
my $short_spaces = $7 ? $7 : '';
|
|
my $long_spaces = $10 ? $10 : '';
|
|
my $locale_end_spaces = $13 ? $13 : '';
|
|
my $c_default_block = $3 ? $3 : '';
|
|
my $default_string = $6 ? $6 : '';
|
|
my $short_string = $9 ? $9 : '';
|
|
my $long_string = $12 ? $12 : '';
|
|
|
|
print OUTPUT "$locale_start_spaces$c_default_block";
|
|
|
|
$default_string =~ s/\s+/ /g;
|
|
$default_string = entity_decode($default_string);
|
|
$short_string =~ s/\s+/ /g;
|
|
$short_string = entity_decode($short_string);
|
|
$long_string =~ s/\s+/ /g;
|
|
$long_string = entity_decode($long_string);
|
|
|
|
for my $lang (sort keys %po_files_by_lang)
|
|
{
|
|
my $default_translation = $translations{$lang, $default_string};
|
|
my $short_translation = $translations{$lang, $short_string};
|
|
my $long_translation = $translations{$lang, $long_string};
|
|
|
|
next if (!$default_translation && !$short_translation &&
|
|
!$long_translation);
|
|
|
|
print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
|
|
|
|
print OUTPUT "$default_spaces";
|
|
|
|
if ($default_translation)
|
|
{
|
|
$default_translation = entity_encode($default_translation);
|
|
print OUTPUT "<default>$default_translation</default>";
|
|
}
|
|
|
|
print OUTPUT "$short_spaces";
|
|
|
|
if ($short_translation)
|
|
{
|
|
$short_translation = entity_encode($short_translation);
|
|
print OUTPUT "<short>$short_translation</short>";
|
|
}
|
|
|
|
print OUTPUT "$long_spaces";
|
|
|
|
if ($long_translation)
|
|
{
|
|
$long_translation = entity_encode($long_translation);
|
|
print OUTPUT "<long>$long_translation</long>";
|
|
}
|
|
|
|
print OUTPUT "$locale_end_spaces</locale>";
|
|
}
|
|
}
|
|
|
|
print OUTPUT $source;
|
|
|
|
close OUTPUT;
|
|
}
|
|
|
|
sub rfc822deb_merge_translations
|
|
{
|
|
my %encodings = ();
|
|
for my $lang (keys %po_files_by_lang) {
|
|
$encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
|
|
}
|
|
|
|
my $source;
|
|
|
|
$Text::Wrap::huge = 'overflow';
|
|
$Text::Wrap::break = qr/\n|\s(?=\S)/;
|
|
|
|
{
|
|
local $/; # slurp mode
|
|
open INPUT, "<$FILE" or die "can't open $FILE: $!";
|
|
$source = <INPUT>;
|
|
close INPUT;
|
|
}
|
|
|
|
open OUTPUT, ">${OUTFILE}" or die;
|
|
binmode (OUTPUT) if $^O eq 'MSWin32';
|
|
|
|
while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
|
|
{
|
|
my $sep = $1;
|
|
my $non_translated_line = $3.$4;
|
|
my $string = $5;
|
|
my $underscore = length($2);
|
|
next if $underscore eq 0 && $non_translated_line =~ /^#/;
|
|
# Remove [] dummy strings
|
|
my $stripped = $string;
|
|
$stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
|
|
$stripped =~ s/\[\s[^\[\]]*\]$//;
|
|
$non_translated_line .= $stripped;
|
|
|
|
print OUTPUT $sep.$non_translated_line;
|
|
|
|
if ($underscore)
|
|
{
|
|
my @str_list = rfc822deb_split($underscore, $string);
|
|
|
|
for my $lang (sort keys %po_files_by_lang)
|
|
{
|
|
my $is_translated = 1;
|
|
my $str_translated = '';
|
|
my $first = 1;
|
|
|
|
for my $str (@str_list)
|
|
{
|
|
my $translation = $translations{$lang, $str};
|
|
|
|
if (!$translation)
|
|
{
|
|
$is_translated = 0;
|
|
last;
|
|
}
|
|
|
|
# $translation may also contain [] dummy
|
|
# strings, mostly to indicate an empty string
|
|
$translation =~ s/\[\s[^\[\]]*\]$//;
|
|
|
|
if ($first)
|
|
{
|
|
if ($underscore eq 2)
|
|
{
|
|
$str_translated .= $translation;
|
|
}
|
|
else
|
|
{
|
|
$str_translated .=
|
|
Text::Tabs::expand($translation) .
|
|
"\n";
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ($underscore eq 2)
|
|
{
|
|
$str_translated .= ', ' . $translation;
|
|
}
|
|
else
|
|
{
|
|
$str_translated .= Text::Tabs::expand(
|
|
Text::Wrap::wrap(' ', ' ', $translation)) .
|
|
"\n .\n";
|
|
}
|
|
}
|
|
$first = 0;
|
|
|
|
# To fix some problems with Text::Wrap::wrap
|
|
$str_translated =~ s/(\n )+\n/\n .\n/g;
|
|
}
|
|
next unless $is_translated;
|
|
|
|
$str_translated =~ s/\n \.\n$//;
|
|
$str_translated =~ s/\s+$//;
|
|
|
|
$_ = $non_translated_line;
|
|
s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
|
|
print OUTPUT;
|
|
}
|
|
}
|
|
}
|
|
print OUTPUT "\n";
|
|
|
|
close OUTPUT;
|
|
close INPUT;
|
|
}
|
|
|
|
sub rfc822deb_split
|
|
{
|
|
# Debian defines a special way to deal with rfc822-style files:
|
|
# when a value contain newlines, it consists of
|
|
# 1. a short form (first line)
|
|
# 2. a long description, all lines begin with a space,
|
|
# and paragraphs are separated by a single dot on a line
|
|
# This routine returns an array of all paragraphs, and reformat
|
|
# them.
|
|
# When first argument is 2, the string is a comma separated list of
|
|
# values.
|
|
my $type = shift;
|
|
my $text = shift;
|
|
$text =~ s/^[ \t]//mg;
|
|
return (split(/, */, $text, 0)) if $type ne 1;
|
|
return ($text) if $text !~ /\n/;
|
|
|
|
$text =~ s/([^\n]*)\n//;
|
|
my @list = ($1);
|
|
my $str = '';
|
|
|
|
for my $line (split (/\n/, $text))
|
|
{
|
|
chomp $line;
|
|
if ($line =~ /^\.\s*$/)
|
|
{
|
|
# New paragraph
|
|
$str =~ s/\s*$//;
|
|
push(@list, $str);
|
|
$str = '';
|
|
}
|
|
elsif ($line =~ /^\s/)
|
|
{
|
|
# Line which must not be reformatted
|
|
$str .= "\n" if length ($str) && $str !~ /\n$/;
|
|
$line =~ s/\s+$//;
|
|
$str .= $line."\n";
|
|
}
|
|
else
|
|
{
|
|
# Continuation line, remove newline
|
|
$str .= " " if length ($str) && $str !~ /\n$/;
|
|
$str .= $line;
|
|
}
|
|
}
|
|
|
|
$str =~ s/\s*$//;
|
|
push(@list, $str) if length ($str);
|
|
|
|
return @list;
|
|
}
|
|
|