You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1404 lines
36KB

  1. #!/usr/bin/perl -w
  2. # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  3. #
  4. # The Intltool Message Merger
  5. #
  6. # Copyright (C) 2000, 2003 Free Software Foundation.
  7. # Copyright (C) 2000, 2001 Eazel, Inc
  8. #
  9. # Intltool is free software; you can redistribute it and/or
  10. # modify it under the terms of the GNU General Public License
  11. # version 2 published by the Free Software Foundation.
  12. #
  13. # Intltool is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. # General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with this program; if not, write to the Free Software
  20. # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21. #
  22. # As a special exception to the GNU General Public License, if you
  23. # distribute this file as part of a program that contains a
  24. # configuration script generated by Autoconf, you may include it under
  25. # the same distribution terms that you use for the rest of that program.
  26. #
  27. # Authors: Maciej Stachowiak <mjs@noisehavoc.org>
  28. # Kenneth Christiansen <kenneth@gnu.org>
  29. # Darin Adler <darin@bentspoon.com>
  30. #
  31. # Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
  32. #
  33. ## Release information
  34. my $PROGRAM = "intltool-merge";
  35. my $PACKAGE = "intltool";
  36. my $VERSION = "0.35.5";
  37. ## Loaded modules
  38. use strict;
  39. use Getopt::Long;
  40. use Text::Wrap;
  41. use File::Basename;
  42. my $must_end_tag = -1;
  43. my $last_depth = -1;
  44. my $translation_depth = -1;
  45. my @tag_stack = ();
  46. my @entered_tag = ();
  47. my @translation_strings = ();
  48. my $leading_space = "";
  49. ## Scalars used by the option stuff
  50. my $HELP_ARG = 0;
  51. my $VERSION_ARG = 0;
  52. my $BA_STYLE_ARG = 0;
  53. my $XML_STYLE_ARG = 0;
  54. my $KEYS_STYLE_ARG = 0;
  55. my $DESKTOP_STYLE_ARG = 0;
  56. my $SCHEMAS_STYLE_ARG = 0;
  57. my $RFC822DEB_STYLE_ARG = 0;
  58. my $QUOTED_STYLE_ARG = 0;
  59. my $QUIET_ARG = 0;
  60. my $PASS_THROUGH_ARG = 0;
  61. my $UTF8_ARG = 0;
  62. my $MULTIPLE_OUTPUT = 0;
  63. my $cache_file;
  64. ## Handle options
  65. GetOptions
  66. (
  67. "help" => \$HELP_ARG,
  68. "version" => \$VERSION_ARG,
  69. "quiet|q" => \$QUIET_ARG,
  70. "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
  71. "ba-style|b" => \$BA_STYLE_ARG,
  72. "xml-style|x" => \$XML_STYLE_ARG,
  73. "keys-style|k" => \$KEYS_STYLE_ARG,
  74. "desktop-style|d" => \$DESKTOP_STYLE_ARG,
  75. "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
  76. "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
  77. "quoted-style" => \$QUOTED_STYLE_ARG,
  78. "pass-through|p" => \$PASS_THROUGH_ARG,
  79. "utf8|u" => \$UTF8_ARG,
  80. "multiple-output|m" => \$MULTIPLE_OUTPUT,
  81. "cache|c=s" => \$cache_file
  82. ) or &error;
  83. my $PO_DIR;
  84. my $FILE;
  85. my $OUTFILE;
  86. my %po_files_by_lang = ();
  87. my %translations = ();
  88. my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "/usr/bin/iconv";
  89. my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');
  90. # Use this instead of \w for XML files to handle more possible characters.
  91. my $w = "[-A-Za-z0-9._:]";
  92. # XML quoted string contents
  93. my $q = "[^\\\"]*";
  94. ## Check for options.
  95. if ($VERSION_ARG)
  96. {
  97. &print_version;
  98. }
  99. elsif ($HELP_ARG)
  100. {
  101. &print_help;
  102. }
  103. elsif ($BA_STYLE_ARG && @ARGV > 2)
  104. {
  105. &utf8_sanity_check;
  106. &preparation;
  107. &print_message;
  108. &ba_merge_translations;
  109. &finalize;
  110. }
  111. elsif ($XML_STYLE_ARG && @ARGV > 2)
  112. {
  113. &utf8_sanity_check;
  114. &preparation;
  115. &print_message;
  116. &xml_merge_output;
  117. &finalize;
  118. }
  119. elsif ($KEYS_STYLE_ARG && @ARGV > 2)
  120. {
  121. &utf8_sanity_check;
  122. &preparation;
  123. &print_message;
  124. &keys_merge_translations;
  125. &finalize;
  126. }
  127. elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
  128. {
  129. &utf8_sanity_check;
  130. &preparation;
  131. &print_message;
  132. &desktop_merge_translations;
  133. &finalize;
  134. }
  135. elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
  136. {
  137. &utf8_sanity_check;
  138. &preparation;
  139. &print_message;
  140. &schemas_merge_translations;
  141. &finalize;
  142. }
  143. elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
  144. {
  145. &preparation;
  146. &print_message;
  147. &rfc822deb_merge_translations;
  148. &finalize;
  149. }
  150. elsif ($QUOTED_STYLE_ARG && @ARGV > 2)
  151. {
  152. &utf8_sanity_check;
  153. &preparation;
  154. &print_message;
  155. &quoted_merge_translations;
  156. &finalize;
  157. }
  158. else
  159. {
  160. &print_help;
  161. }
  162. exit;
  163. ## Sub for printing release information
  164. sub print_version
  165. {
  166. print <<_EOF_;
  167. ${PROGRAM} (${PACKAGE}) ${VERSION}
  168. Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
  169. Copyright (C) 2000-2003 Free Software Foundation, Inc.
  170. Copyright (C) 2000-2001 Eazel, Inc.
  171. This is free software; see the source for copying conditions. There is NO
  172. warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  173. _EOF_
  174. exit;
  175. }
  176. ## Sub for printing usage information
  177. sub print_help
  178. {
  179. print <<_EOF_;
  180. Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
  181. Generates an output file that includes some localized attributes from an
  182. untranslated source file.
  183. Mandatory options: (exactly one must be specified)
  184. -b, --ba-style includes translations in the bonobo-activation style
  185. -d, --desktop-style includes translations in the desktop style
  186. -k, --keys-style includes translations in the keys style
  187. -s, --schemas-style includes translations in the schemas style
  188. -r, --rfc822deb-style includes translations in the RFC822 style
  189. --quoted-style includes translations in the quoted string style
  190. -x, --xml-style includes translations in the standard xml style
  191. Other options:
  192. -u, --utf8 convert all strings to UTF-8 before merging
  193. (default for everything except RFC822 style)
  194. -p, --pass-through deprecated, does nothing and issues a warning
  195. -m, --multiple-output output one localized file per locale, instead of
  196. a single file containing all localized elements
  197. -c, --cache=FILE specify cache file name
  198. (usually \$top_builddir/po/.intltool-merge-cache)
  199. -q, --quiet suppress most messages
  200. --help display this help and exit
  201. --version output version information and exit
  202. Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
  203. or send email to <xml-i18n-tools\@gnome.org>.
  204. _EOF_
  205. exit;
  206. }
  207. ## Sub for printing error messages
  208. sub print_error
  209. {
  210. print STDERR "Try `${PROGRAM} --help' for more information.\n";
  211. exit;
  212. }
  213. sub print_message
  214. {
  215. print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
  216. }
  217. sub preparation
  218. {
  219. $PO_DIR = $ARGV[0];
  220. $FILE = $ARGV[1];
  221. $OUTFILE = $ARGV[2];
  222. &gather_po_files;
  223. &get_translation_database;
  224. }
  225. # General-purpose code for looking up translations in .po files
  226. sub po_file2lang
  227. {
  228. my ($tmp) = @_;
  229. $tmp =~ s/^.*\/(.*)\.po$/$1/;
  230. return $tmp;
  231. }
  232. sub gather_po_files
  233. {
  234. for my $po_file (glob "$PO_DIR/*.po") {
  235. $po_files_by_lang{po_file2lang($po_file)} = $po_file;
  236. }
  237. }
  238. sub get_local_charset
  239. {
  240. my ($encoding) = @_;
  241. my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
  242. # seek character encoding aliases in charset.alias (glib)
  243. if (open CHARSET_ALIAS, $alias_file)
  244. {
  245. while (<CHARSET_ALIAS>)
  246. {
  247. next if /^\#/;
  248. return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
  249. }
  250. close CHARSET_ALIAS;
  251. }
  252. # if not found, return input string
  253. return $encoding;
  254. }
  255. sub get_po_encoding
  256. {
  257. my ($in_po_file) = @_;
  258. my $encoding = "";
  259. open IN_PO_FILE, $in_po_file or die;
  260. while (<IN_PO_FILE>)
  261. {
  262. ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
  263. if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
  264. {
  265. $encoding = $1;
  266. last;
  267. }
  268. }
  269. close IN_PO_FILE;
  270. if (!$encoding)
  271. {
  272. print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
  273. $encoding = "ISO-8859-1";
  274. }
  275. system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
  276. if ($?) {
  277. $encoding = get_local_charset($encoding);
  278. }
  279. return $encoding
  280. }
  281. sub utf8_sanity_check
  282. {
  283. print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
  284. $UTF8_ARG = 1;
  285. }
  286. sub get_translation_database
  287. {
  288. if ($cache_file) {
  289. &get_cached_translation_database;
  290. } else {
  291. &create_translation_database;
  292. }
  293. }
  294. sub get_newest_po_age
  295. {
  296. my $newest_age;
  297. foreach my $file (values %po_files_by_lang)
  298. {
  299. my $file_age = -M $file;
  300. $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
  301. }
  302. $newest_age = 0 if !$newest_age;
  303. return $newest_age;
  304. }
  305. sub create_cache
  306. {
  307. print "Generating and caching the translation database\n" unless $QUIET_ARG;
  308. &create_translation_database;
  309. open CACHE, ">$cache_file" || die;
  310. print CACHE join "\x01", %translations;
  311. close CACHE;
  312. }
  313. sub load_cache
  314. {
  315. print "Found cached translation database\n" unless $QUIET_ARG;
  316. my $contents;
  317. open CACHE, "<$cache_file" || die;
  318. {
  319. local $/;
  320. $contents = <CACHE>;
  321. }
  322. close CACHE;
  323. %translations = split "\x01", $contents;
  324. }
  325. sub get_cached_translation_database
  326. {
  327. my $cache_file_age = -M $cache_file;
  328. if (defined $cache_file_age)
  329. {
  330. if ($cache_file_age <= &get_newest_po_age)
  331. {
  332. &load_cache;
  333. return;
  334. }
  335. print "Found too-old cached translation database\n" unless $QUIET_ARG;
  336. }
  337. &create_cache;
  338. }
  339. sub create_translation_database
  340. {
  341. for my $lang (keys %po_files_by_lang)
  342. {
  343. my $po_file = $po_files_by_lang{$lang};
  344. if ($UTF8_ARG)
  345. {
  346. my $encoding = get_po_encoding ($po_file);
  347. if (lc $encoding eq "utf-8")
  348. {
  349. open PO_FILE, "<$po_file";
  350. }
  351. else
  352. {
  353. print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
  354. open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
  355. }
  356. }
  357. else
  358. {
  359. open PO_FILE, "<$po_file";
  360. }
  361. my $nextfuzzy = 0;
  362. my $inmsgid = 0;
  363. my $inmsgstr = 0;
  364. my $msgid = "";
  365. my $msgstr = "";
  366. while (<PO_FILE>)
  367. {
  368. $nextfuzzy = 1 if /^#, fuzzy/;
  369. if (/^msgid "((\\.|[^\\]+)*)"/ )
  370. {
  371. $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr && $msgid ne $msgstr;
  372. $msgid = "";
  373. $msgstr = "";
  374. if ($nextfuzzy) {
  375. $inmsgid = 0;
  376. } else {
  377. $msgid = unescape_po_string($1);
  378. $inmsgid = 1;
  379. }
  380. $inmsgstr = 0;
  381. $nextfuzzy = 0;
  382. }
  383. if (/^msgstr "((\\.|[^\\]+)*)"/)
  384. {
  385. $msgstr = unescape_po_string($1);
  386. $inmsgstr = 1;
  387. $inmsgid = 0;
  388. }
  389. if (/^"((\\.|[^\\]+)*)"/)
  390. {
  391. $msgid .= unescape_po_string($1) if $inmsgid;
  392. $msgstr .= unescape_po_string($1) if $inmsgstr;
  393. }
  394. }
  395. $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr && $msgstr ne $msgid;
  396. }
  397. }
  398. sub finalize
  399. {
  400. }
  401. sub unescape_one_sequence
  402. {
  403. my ($sequence) = @_;
  404. return "\\" if $sequence eq "\\\\";
  405. return "\"" if $sequence eq "\\\"";
  406. return "\n" if $sequence eq "\\n";
  407. return "\r" if $sequence eq "\\r";
  408. return "\t" if $sequence eq "\\t";
  409. return "\b" if $sequence eq "\\b";
  410. return "\f" if $sequence eq "\\f";
  411. return "\a" if $sequence eq "\\a";
  412. return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
  413. return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
  414. return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
  415. # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
  416. return $sequence;
  417. }
  418. sub unescape_po_string
  419. {
  420. my ($string) = @_;
  421. $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
  422. return $string;
  423. }
  424. ## NOTE: deal with < - &lt; but not > - &gt; because it seems its ok to have
  425. ## > in the entity. For further info please look at #84738.
  426. sub entity_decode
  427. {
  428. local ($_) = @_;
  429. s/&apos;/'/g; # '
  430. s/&quot;/"/g; # "
  431. s/&amp;/&/g;
  432. s/&lt;/</g;
  433. return $_;
  434. }
  435. # entity_encode: (string)
  436. #
  437. # Encode the given string to XML format (encode '<' etc).
  438. sub entity_encode
  439. {
  440. my ($pre_encoded) = @_;
  441. my @list_of_chars = unpack ('C*', $pre_encoded);
  442. # with UTF-8 we only encode minimalistic
  443. return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
  444. }
  445. sub entity_encode_int_minimalist
  446. {
  447. return "&quot;" if $_ == 34;
  448. return "&amp;" if $_ == 38;
  449. return "&apos;" if $_ == 39;
  450. return "&lt;" if $_ == 60;
  451. return chr $_;
  452. }
  453. sub entity_encoded_translation
  454. {
  455. my ($lang, $string) = @_;
  456. my $translation = $translations{$lang, $string};
  457. return $string if !$translation;
  458. return entity_encode ($translation);
  459. }
  460. ## XML (bonobo-activation specific) merge code
  461. sub ba_merge_translations
  462. {
  463. my $source;
  464. {
  465. local $/; # slurp mode
  466. open INPUT, "<$FILE" or die "can't open $FILE: $!";
  467. $source = <INPUT>;
  468. close INPUT;
  469. }
  470. open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
  471. # Binmode so that selftest works ok if using a native Win32 Perl...
  472. binmode (OUTPUT) if $^O eq 'MSWin32';
  473. while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
  474. {
  475. print OUTPUT $1;
  476. my $node = $2 . "\n";
  477. my @strings = ();
  478. $_ = $node;
  479. while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
  480. push @strings, entity_decode($3);
  481. }
  482. print OUTPUT;
  483. my %langs;
  484. for my $string (@strings)
  485. {
  486. for my $lang (keys %po_files_by_lang)
  487. {
  488. $langs{$lang} = 1 if $translations{$lang, $string};
  489. }
  490. }
  491. for my $lang (sort keys %langs)
  492. {
  493. $_ = $node;
  494. s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
  495. s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
  496. print OUTPUT;
  497. }
  498. }
  499. print OUTPUT $source;
  500. close OUTPUT;
  501. }
  502. ## XML (non-bonobo-activation) merge code
  503. # Process tag attributes
  504. # Only parameter is a HASH containing attributes -> values mapping
  505. sub getAttributeString
  506. {
  507. my $sub = shift;
  508. my $do_translate = shift || 0;
  509. my $language = shift || "";
  510. my $result = "";
  511. my $translate = shift;
  512. foreach my $e (reverse(sort(keys %{ $sub }))) {
  513. my $key = $e;
  514. my $string = $sub->{$e};
  515. my $quote = '"';
  516. $string =~ s/^[\s]+//;
  517. $string =~ s/[\s]+$//;
  518. if ($string =~ /^'.*'$/)
  519. {
  520. $quote = "'";
  521. }
  522. $string =~ s/^['"]//g;
  523. $string =~ s/['"]$//g;
  524. if ($do_translate && $key =~ /^_/) {
  525. $key =~ s|^_||g;
  526. if ($language) {
  527. # Handle translation
  528. my $decode_string = entity_decode($string);
  529. my $translation = $translations{$language, $decode_string};
  530. if ($translation) {
  531. $translation = entity_encode($translation);
  532. $string = $translation;
  533. }
  534. $$translate = 2;
  535. } else {
  536. $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
  537. }
  538. }
  539. $result .= " $key=$quote$string$quote";
  540. }
  541. return $result;
  542. }
  543. # Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
  544. sub getXMLstring
  545. {
  546. my $ref = shift;
  547. my $spacepreserve = shift || 0;
  548. my @list = @{ $ref };
  549. my $result = "";
  550. my $count = scalar(@list);
  551. my $attrs = $list[0];
  552. my $index = 1;
  553. $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
  554. $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
  555. while ($index < $count) {
  556. my $type = $list[$index];
  557. my $content = $list[$index+1];
  558. if (! $type ) {
  559. # We've got CDATA
  560. if ($content) {
  561. # lets strip the whitespace here, and *ONLY* here
  562. $content =~ s/\s+/ /gs if (!$spacepreserve);
  563. $result .= $content;
  564. }
  565. } elsif ( "$type" ne "1" ) {
  566. # We've got another element
  567. $result .= "<$type";
  568. $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
  569. if ($content) {
  570. my $subresult = getXMLstring($content, $spacepreserve);
  571. if ($subresult) {
  572. $result .= ">".$subresult . "</$type>";
  573. } else {
  574. $result .= "/>";
  575. }
  576. } else {
  577. $result .= "/>";
  578. }
  579. }
  580. $index += 2;
  581. }
  582. return $result;
  583. }
  584. # Translate list of nodes if necessary
  585. sub translate_subnodes
  586. {
  587. my $fh = shift;
  588. my $content = shift;
  589. my $language = shift || "";
  590. my $singlelang = shift || 0;
  591. my $spacepreserve = shift || 0;
  592. my @nodes = @{ $content };
  593. my $count = scalar(@nodes);
  594. my $index = 0;
  595. while ($index < $count) {
  596. my $type = $nodes[$index];
  597. my $rest = $nodes[$index+1];
  598. if ($singlelang) {
  599. my $oldMO = $MULTIPLE_OUTPUT;
  600. $MULTIPLE_OUTPUT = 1;
  601. traverse($fh, $type, $rest, $language, $spacepreserve);
  602. $MULTIPLE_OUTPUT = $oldMO;
  603. } else {
  604. traverse($fh, $type, $rest, $language, $spacepreserve);
  605. }
  606. $index += 2;
  607. }
  608. }
  609. sub isWellFormedXmlFragment
  610. {
  611. my $ret = eval 'require XML::Parser';
  612. if(!$ret) {
  613. die "You must have XML::Parser installed to run $0\n\n";
  614. }
  615. my $fragment = shift;
  616. return 0 if (!$fragment);
  617. $fragment = "<root>$fragment</root>";
  618. my $xp = new XML::Parser(Style => 'Tree');
  619. my $tree = 0;
  620. eval { $tree = $xp->parse($fragment); };
  621. return $tree;
  622. }
  623. sub traverse
  624. {
  625. my $fh = shift;
  626. my $nodename = shift;
  627. my $content = shift;
  628. my $language = shift || "";
  629. my $spacepreserve = shift || 0;
  630. if (!$nodename) {
  631. if ($content =~ /^[\s]*$/) {
  632. $leading_space .= $content;
  633. }
  634. print $fh $content;
  635. } else {
  636. # element
  637. my @all = @{ $content };
  638. my $attrs = shift @all;
  639. my $translate = 0;
  640. my $outattr = getAttributeString($attrs, 1, $language, \$translate);
  641. if ($nodename =~ /^_/) {
  642. $translate = 1;
  643. $nodename =~ s/^_//;
  644. }
  645. my $lookup = '';
  646. $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
  647. $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
  648. print $fh "<$nodename", $outattr;
  649. if ($translate) {
  650. $lookup = getXMLstring($content, $spacepreserve);
  651. if (!$spacepreserve) {
  652. $lookup =~ s/^\s+//s;
  653. $lookup =~ s/\s+$//s;
  654. }
  655. if ($lookup || $translate == 2) {
  656. my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
  657. if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
  658. $translation = $lookup if (!$translation);
  659. print $fh " xml:lang=\"", $language, "\"" if $language;
  660. print $fh ">";
  661. if ($translate == 2) {
  662. translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
  663. } else {
  664. print $fh $translation;
  665. }
  666. print $fh "</$nodename>";
  667. return; # this means there will be no same translation with xml:lang="$language"...
  668. # if we want them both, just remove this "return"
  669. } else {
  670. print $fh ">";
  671. if ($translate == 2) {
  672. translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
  673. } else {
  674. print $fh $lookup;
  675. }
  676. print $fh "</$nodename>";
  677. }
  678. } else {
  679. print $fh "/>";
  680. }
  681. for my $lang (sort keys %po_files_by_lang) {
  682. if ($MULTIPLE_OUTPUT && $lang ne "$language") {
  683. next;
  684. }
  685. if ($lang) {
  686. # Handle translation
  687. #
  688. my $translate = 0;
  689. my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
  690. my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
  691. if ($translate && !$translation) {
  692. $translation = $lookup;
  693. }
  694. if ($translation || $translate) {
  695. print $fh "\n";
  696. $leading_space =~ s/.*\n//g;
  697. print $fh $leading_space;
  698. print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
  699. if ($translate == 2) {
  700. translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
  701. } else {
  702. print $fh $translation;
  703. }
  704. print $fh "</$nodename>";
  705. }
  706. }
  707. }
  708. } else {
  709. my $count = scalar(@all);
  710. if ($count > 0) {
  711. print $fh ">";
  712. my $index = 0;
  713. while ($index < $count) {
  714. my $type = $all[$index];
  715. my $rest = $all[$index+1];
  716. traverse($fh, $type, $rest, $language, $spacepreserve);
  717. $index += 2;
  718. }
  719. print $fh "</$nodename>";
  720. } else {
  721. print $fh "/>";
  722. }
  723. }
  724. }
  725. }
  726. sub intltool_tree_comment
  727. {
  728. my $expat = shift;
  729. my $data = shift;
  730. my $clist = $expat->{Curlist};
  731. my $pos = $#$clist;
  732. push @$clist, 1 => $data;
  733. }
  734. sub intltool_tree_cdatastart
  735. {
  736. my $expat = shift;
  737. my $clist = $expat->{Curlist};
  738. my $pos = $#$clist;
  739. push @$clist, 0 => $expat->original_string();
  740. }
  741. sub intltool_tree_cdataend
  742. {
  743. my $expat = shift;
  744. my $clist = $expat->{Curlist};
  745. my $pos = $#$clist;
  746. $clist->[$pos] .= $expat->original_string();
  747. }
  748. sub intltool_tree_char
  749. {
  750. my $expat = shift;
  751. my $text = shift;
  752. my $clist = $expat->{Curlist};
  753. my $pos = $#$clist;
  754. # Use original_string so that we retain escaped entities
  755. # in CDATA sections.
  756. #
  757. if ($pos > 0 and $clist->[$pos - 1] eq '0') {
  758. $clist->[$pos] .= $expat->original_string();
  759. } else {
  760. push @$clist, 0 => $expat->original_string();
  761. }
  762. }
  763. sub intltool_tree_start
  764. {
  765. my $expat = shift;
  766. my $tag = shift;
  767. my @origlist = ();
  768. # Use original_string so that we retain escaped entities
  769. # in attribute values. We must convert the string to an
  770. # @origlist array to conform to the structure of the Tree
  771. # Style.
  772. #
  773. my @original_array = split /\x/, $expat->original_string();
  774. my $source = $expat->original_string();
  775. # Remove leading tag.
  776. #
  777. $source =~ s|^\s*<\s*(\S+)||s;
  778. # Grab attribute key/value pairs and push onto @origlist array.
  779. #
  780. while ($source)
  781. {
  782. if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
  783. {
  784. $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
  785. push @origlist, $1;
  786. push @origlist, '"' . $2 . '"';
  787. }
  788. elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
  789. {
  790. $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
  791. push @origlist, $1;
  792. push @origlist, "'" . $2 . "'";
  793. }
  794. else
  795. {
  796. last;
  797. }
  798. }
  799. my $ol = [ { @origlist } ];
  800. push @{ $expat->{Lists} }, $expat->{Curlist};
  801. push @{ $expat->{Curlist} }, $tag => $ol;
  802. $expat->{Curlist} = $ol;
  803. }
  804. sub readXml
  805. {
  806. my $filename = shift || return;
  807. if(!-f $filename) {
  808. die "ERROR Cannot find filename: $filename\n";
  809. }
  810. my $ret = eval 'require XML::Parser';
  811. if(!$ret) {
  812. die "You must have XML::Parser installed to run $0\n\n";
  813. }
  814. my $xp = new XML::Parser(Style => 'Tree');
  815. $xp->setHandlers(Char => \&intltool_tree_char);
  816. $xp->setHandlers(Start => \&intltool_tree_start);
  817. $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
  818. $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
  819. my $tree = $xp->parsefile($filename);
  820. # <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
  821. # would be:
  822. # [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
  823. # 0, "Howdy", ref, [{}]], 0, "do" ] ]
  824. return $tree;
  825. }
  826. sub print_header
  827. {
  828. my $infile = shift;
  829. my $fh = shift;
  830. my $source;
  831. if(!-f $infile) {
  832. die "ERROR Cannot find filename: $infile\n";
  833. }
  834. print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
  835. {
  836. local $/;
  837. open DOCINPUT, "<${FILE}" or die;
  838. $source = <DOCINPUT>;
  839. close DOCINPUT;
  840. }
  841. if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
  842. {
  843. print $fh "$1\n";
  844. }
  845. elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
  846. {
  847. print $fh "$1\n";
  848. }
  849. }
  850. sub parseTree
  851. {
  852. my $fh = shift;
  853. my $ref = shift;
  854. my $language = shift || "";
  855. my $name = shift @{ $ref };
  856. my $cont = shift @{ $ref };
  857. while (!$name || "$name" eq "1") {
  858. $name = shift @{ $ref };
  859. $cont = shift @{ $ref };
  860. }
  861. my $spacepreserve = 0;
  862. my $attrs = @{$cont}[0];
  863. $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
  864. traverse($fh, $name, $cont, $language, $spacepreserve);
  865. }
  866. sub xml_merge_output
  867. {
  868. my $source;
  869. if ($MULTIPLE_OUTPUT) {
  870. for my $lang (sort keys %po_files_by_lang) {
  871. if ( ! -d $lang ) {
  872. mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
  873. }
  874. open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
  875. binmode (OUTPUT) if $^O eq 'MSWin32';
  876. my $tree = readXml($FILE);
  877. print_header($FILE, \*OUTPUT);
  878. parseTree(\*OUTPUT, $tree, $lang);
  879. close OUTPUT;
  880. print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
  881. }
  882. }
  883. open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
  884. binmode (OUTPUT) if $^O eq 'MSWin32';
  885. my $tree = readXml($FILE);
  886. print_header($FILE, \*OUTPUT);
  887. parseTree(\*OUTPUT, $tree);
  888. close OUTPUT;
  889. print "CREATED $OUTFILE\n" unless $QUIET_ARG;
  890. }
  891. sub keys_merge_translations
  892. {
  893. open INPUT, "<${FILE}" or die;
  894. open OUTPUT, ">${OUTFILE}" or die;
  895. binmode (OUTPUT) if $^O eq 'MSWin32';
  896. while (<INPUT>)
  897. {
  898. if (s/^(\s*)_(\w+=(.*))/$1$2/)
  899. {
  900. my $string = $3;
  901. print OUTPUT;
  902. my $non_translated_line = $_;
  903. for my $lang (sort keys %po_files_by_lang)
  904. {
  905. my $translation = $translations{$lang, $string};
  906. next if !$translation;
  907. $_ = $non_translated_line;
  908. s/(\w+)=.*/[$lang]$1=$translation/;
  909. print OUTPUT;
  910. }
  911. }
  912. else
  913. {
  914. print OUTPUT;
  915. }
  916. }
  917. close OUTPUT;
  918. close INPUT;
  919. }
  920. sub desktop_merge_translations
  921. {
  922. open INPUT, "<${FILE}" or die;
  923. open OUTPUT, ">${OUTFILE}" or die;
  924. binmode (OUTPUT) if $^O eq 'MSWin32';
  925. while (<INPUT>)
  926. {
  927. if (s/^(\s*)_(\w+=(.*))/$1$2/)
  928. {
  929. my $string = $3;
  930. print OUTPUT;
  931. my $non_translated_line = $_;
  932. for my $lang (sort keys %po_files_by_lang)
  933. {
  934. my $translation = $translations{$lang, $string};
  935. next if !$translation;
  936. $_ = $non_translated_line;
  937. s/(\w+)=.*/${1}[$lang]=$translation/;
  938. print OUTPUT;
  939. }
  940. }
  941. else
  942. {
  943. print OUTPUT;
  944. }
  945. }
  946. close OUTPUT;
  947. close INPUT;
  948. }
  949. sub schemas_merge_translations
  950. {
  951. my $source;
  952. {
  953. local $/; # slurp mode
  954. open INPUT, "<$FILE" or die "can't open $FILE: $!";
  955. $source = <INPUT>;
  956. close INPUT;
  957. }
  958. open OUTPUT, ">$OUTFILE" or die;
  959. binmode (OUTPUT) if $^O eq 'MSWin32';
  960. # FIXME: support attribute translations
  961. # Empty nodes never need translation, so unmark all of them.
  962. # For example, <_foo/> is just replaced by <foo/>.
  963. $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
  964. while ($source =~ s/
  965. (.*?)
  966. (\s+)(<locale\ name="C">(\s*)
  967. (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
  968. (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
  969. (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
  970. <\/locale>)
  971. //sx)
  972. {
  973. print OUTPUT $1;
  974. my $locale_start_spaces = $2 ? $2 : '';
  975. my $default_spaces = $4 ? $4 : '';
  976. my $short_spaces = $7 ? $7 : '';
  977. my $long_spaces = $10 ? $10 : '';
  978. my $locale_end_spaces = $13 ? $13 : '';
  979. my $c_default_block = $3 ? $3 : '';
  980. my $default_string = $6 ? $6 : '';
  981. my $short_string = $9 ? $9 : '';
  982. my $long_string = $12 ? $12 : '';
  983. print OUTPUT "$locale_start_spaces$c_default_block";
  984. $default_string =~ s/\s+/ /g;
  985. $default_string = entity_decode($default_string);
  986. $short_string =~ s/\s+/ /g;
  987. $short_string = entity_decode($short_string);
  988. $long_string =~ s/\s+/ /g;
  989. $long_string = entity_decode($long_string);
  990. for my $lang (sort keys %po_files_by_lang)
  991. {
  992. my $default_translation = $translations{$lang, $default_string};
  993. my $short_translation = $translations{$lang, $short_string};
  994. my $long_translation = $translations{$lang, $long_string};
  995. next if (!$default_translation && !$short_translation &&
  996. !$long_translation);
  997. print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
  998. print OUTPUT "$default_spaces";
  999. if ($default_translation)
  1000. {
  1001. $default_translation = entity_encode($default_translation);
  1002. print OUTPUT "<default>$default_translation</default>";
  1003. }
  1004. print OUTPUT "$short_spaces";
  1005. if ($short_translation)
  1006. {
  1007. $short_translation = entity_encode($short_translation);
  1008. print OUTPUT "<short>$short_translation</short>";
  1009. }
  1010. print OUTPUT "$long_spaces";
  1011. if ($long_translation)
  1012. {
  1013. $long_translation = entity_encode($long_translation);
  1014. print OUTPUT "<long>$long_translation</long>";
  1015. }
  1016. print OUTPUT "$locale_end_spaces</locale>";
  1017. }
  1018. }
  1019. print OUTPUT $source;
  1020. close OUTPUT;
  1021. }
  1022. sub rfc822deb_merge_translations
  1023. {
  1024. my %encodings = ();
  1025. for my $lang (keys %po_files_by_lang) {
  1026. $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
  1027. }
  1028. my $source;
  1029. $Text::Wrap::huge = 'overflow';
  1030. $Text::Wrap::break = qr/\n|\s(?=\S)/;
  1031. {
  1032. local $/; # slurp mode
  1033. open INPUT, "<$FILE" or die "can't open $FILE: $!";
  1034. $source = <INPUT>;
  1035. close INPUT;
  1036. }
  1037. open OUTPUT, ">${OUTFILE}" or die;
  1038. binmode (OUTPUT) if $^O eq 'MSWin32';
  1039. while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
  1040. {
  1041. my $sep = $1;
  1042. my $non_translated_line = $3.$4;
  1043. my $string = $5;
  1044. my $underscore = length($2);
  1045. next if $underscore eq 0 && $non_translated_line =~ /^#/;
  1046. # Remove [] dummy strings
  1047. my $stripped = $string;
  1048. $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
  1049. $stripped =~ s/\[\s[^\[\]]*\]$//;
  1050. $non_translated_line .= $stripped;
  1051. print OUTPUT $sep.$non_translated_line;
  1052. if ($underscore)
  1053. {
  1054. my @str_list = rfc822deb_split($underscore, $string);
  1055. for my $lang (sort keys %po_files_by_lang)
  1056. {
  1057. my $is_translated = 1;
  1058. my $str_translated = '';
  1059. my $first = 1;
  1060. for my $str (@str_list)
  1061. {
  1062. my $translation = $translations{$lang, $str};
  1063. if (!$translation)
  1064. {
  1065. $is_translated = 0;
  1066. last;
  1067. }
  1068. # $translation may also contain [] dummy
  1069. # strings, mostly to indicate an empty string
  1070. $translation =~ s/\[\s[^\[\]]*\]$//;
  1071. if ($first)
  1072. {
  1073. if ($underscore eq 2)
  1074. {
  1075. $str_translated .= $translation;
  1076. }
  1077. else
  1078. {
  1079. $str_translated .=
  1080. Text::Tabs::expand($translation) .
  1081. "\n";
  1082. }
  1083. }
  1084. else
  1085. {
  1086. if ($underscore eq 2)
  1087. {
  1088. $str_translated .= ', ' . $translation;
  1089. }
  1090. else
  1091. {
  1092. $str_translated .= Text::Tabs::expand(
  1093. Text::Wrap::wrap(' ', ' ', $translation)) .
  1094. "\n .\n";
  1095. }
  1096. }
  1097. $first = 0;
  1098. # To fix some problems with Text::Wrap::wrap
  1099. $str_translated =~ s/(\n )+\n/\n .\n/g;
  1100. }
  1101. next unless $is_translated;
  1102. $str_translated =~ s/\n \.\n$//;
  1103. $str_translated =~ s/\s+$//;
  1104. $_ = $non_translated_line;
  1105. s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
  1106. print OUTPUT;
  1107. }
  1108. }
  1109. }
  1110. print OUTPUT "\n";
  1111. close OUTPUT;
  1112. close INPUT;
  1113. }
  1114. sub rfc822deb_split
  1115. {
  1116. # Debian defines a special way to deal with rfc822-style files:
  1117. # when a value contain newlines, it consists of
  1118. # 1. a short form (first line)
  1119. # 2. a long description, all lines begin with a space,
  1120. # and paragraphs are separated by a single dot on a line
  1121. # This routine returns an array of all paragraphs, and reformat
  1122. # them.
  1123. # When first argument is 2, the string is a comma separated list of
  1124. # values.
  1125. my $type = shift;
  1126. my $text = shift;
  1127. $text =~ s/^[ \t]//mg;
  1128. return (split(/, */, $text, 0)) if $type ne 1;
  1129. return ($text) if $text !~ /\n/;
  1130. $text =~ s/([^\n]*)\n//;
  1131. my @list = ($1);
  1132. my $str = '';
  1133. for my $line (split (/\n/, $text))
  1134. {
  1135. chomp $line;
  1136. if ($line =~ /^\.\s*$/)
  1137. {
  1138. # New paragraph
  1139. $str =~ s/\s*$//;
  1140. push(@list, $str);
  1141. $str = '';
  1142. }
  1143. elsif ($line =~ /^\s/)
  1144. {
  1145. # Line which must not be reformatted
  1146. $str .= "\n" if length ($str) && $str !~ /\n$/;
  1147. $line =~ s/\s+$//;
  1148. $str .= $line."\n";
  1149. }
  1150. else
  1151. {
  1152. # Continuation line, remove newline
  1153. $str .= " " if length ($str) && $str !~ /\n$/;
  1154. $str .= $line;
  1155. }
  1156. }
  1157. $str =~ s/\s*$//;
  1158. push(@list, $str) if length ($str);
  1159. return @list;
  1160. }
  1161. sub quoted_translation
  1162. {
  1163. my ($lang, $string) = @_;
  1164. $string =~ s/\\\"/\"/g;
  1165. my $translation = $translations{$lang, $string};
  1166. $translation = $string if !$translation;
  1167. $translation =~ s/\"/\\\"/g;
  1168. return $translation
  1169. }
  1170. sub quoted_merge_translations
  1171. {
  1172. if (!$MULTIPLE_OUTPUT) {
  1173. print "Quoted only supports Multiple Output.\n";
  1174. exit(1);
  1175. }
  1176. for my $lang (sort keys %po_files_by_lang) {
  1177. if ( ! -d $lang ) {
  1178. mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
  1179. }
  1180. open INPUT, "<${FILE}" or die;
  1181. open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
  1182. binmode (OUTPUT) if $^O eq 'MSWin32';
  1183. while (<INPUT>)
  1184. {
  1185. s/\"(([^\"]|\\\")*[^\\\"])\"/"\"" . &quoted_translation($lang, $1) . "\""/ge;
  1186. print OUTPUT;
  1187. }
  1188. close OUTPUT;
  1189. close INPUT;
  1190. }
  1191. }