2004-10-28 14:35:53 +00:00
|
|
|
#! /usr/bin/perl -w
|
2003-03-10 12:57:57 +00:00
|
|
|
|
|
|
|
# file pocheck.pl
|
|
|
|
#
|
|
|
|
# This file is part of LyX, the document processor.
|
|
|
|
# Licence details can be found in the file COPYING.
|
|
|
|
#
|
2005-09-06 17:20:44 +00:00
|
|
|
# author: Michael Gerz, michael.gerz@teststep.org
|
2003-03-10 12:57:57 +00:00
|
|
|
#
|
|
|
|
# This script performs some consistency checks on po files:
|
|
|
|
#
|
|
|
|
# 1. Uniform translation of messages that are identical except
|
|
|
|
# for capitalization, shortcuts, and shortcut notation.
|
2004-10-28 14:35:53 +00:00
|
|
|
# 2. Usage of the following elements in both the original and
|
|
|
|
# the translated message (or no usage at all):
|
2003-03-10 12:57:57 +00:00
|
|
|
# shortcuts ("&" and "|..."), trailing space, trailing colon
|
|
|
|
#
|
|
|
|
# Invocation:
|
|
|
|
# pocheck.pl po_file po_file ...
|
|
|
|
|
2011-04-05 22:13:48 +00:00
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
my %trans;
|
|
|
|
|
|
|
|
foreach my $pofilename ( @ARGV )
|
2003-03-10 12:57:57 +00:00
|
|
|
{
|
|
|
|
print "Processing po file '$pofilename'...\n";
|
|
|
|
|
2004-10-28 14:35:53 +00:00
|
|
|
open( INPUT, "<$pofilename" )
|
2003-03-10 12:57:57 +00:00
|
|
|
|| die "Cannot read po file '$pofilename'";
|
2011-04-05 22:13:48 +00:00
|
|
|
my @pofile = <INPUT>;
|
2003-03-10 12:57:57 +00:00
|
|
|
close( INPUT );
|
|
|
|
|
|
|
|
undef( %trans );
|
|
|
|
keys( %trans ) = 10000;
|
|
|
|
|
2011-04-05 22:13:48 +00:00
|
|
|
my $noOfLines = $#pofile;
|
|
|
|
|
|
|
|
my $warn = 0;
|
2004-10-28 14:35:53 +00:00
|
|
|
|
2011-04-05 22:13:48 +00:00
|
|
|
my $i = 0;
|
|
|
|
my ($msgid, $msgstr, $more);
|
2004-10-28 14:35:53 +00:00
|
|
|
|
2003-03-10 12:57:57 +00:00
|
|
|
while ($i <= $noOfLines) {
|
2011-04-05 21:48:55 +00:00
|
|
|
( $msgid ) = ( $pofile[$i] =~ m/^msgid "(.*)"/ );
|
|
|
|
$i++;
|
|
|
|
next unless $msgid;
|
|
|
|
|
|
|
|
# some msgid's are more than one line long, so add those.
|
|
|
|
while ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) {
|
|
|
|
$msgid = $msgid . $more;
|
2003-03-10 12:57:57 +00:00
|
|
|
$i++;
|
2011-04-05 21:48:55 +00:00
|
|
|
}
|
2011-04-05 22:11:06 +00:00
|
|
|
|
2011-04-05 21:48:55 +00:00
|
|
|
# now look for the associated msgstr.
|
|
|
|
until ( ( $msgstr ) = ( $pofile[$i] =~ m/^msgstr "(.*)"/ ) ) { $i++; };
|
|
|
|
$i++;
|
|
|
|
# again collect any extra lines.
|
|
|
|
while ( ( $i <= $noOfLines ) &&
|
|
|
|
( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) ) {
|
|
|
|
$msgstr = $msgstr . $more;
|
2003-03-10 12:57:57 +00:00
|
|
|
$i++;
|
2011-04-05 21:48:55 +00:00
|
|
|
}
|
2003-03-10 12:57:57 +00:00
|
|
|
|
2011-04-05 21:48:55 +00:00
|
|
|
# nothing to do if one of them is empty.
|
|
|
|
# (surely that is always $msgstr?)
|
|
|
|
next if ($msgid eq "" or $msgstr eq "");
|
|
|
|
|
2011-04-05 22:11:06 +00:00
|
|
|
# Check for matching %1$s, etc.
|
2011-04-05 22:13:48 +00:00
|
|
|
my @argstrs = ( $msgid =~ m/%(\d)\$s/g );
|
2011-04-05 22:11:06 +00:00
|
|
|
if (@argstrs) {
|
2011-04-05 22:13:48 +00:00
|
|
|
my $n = 0;
|
|
|
|
foreach my $arg (@argstrs) { $n = $arg if $arg > $n; }
|
|
|
|
if ($n <= 0) {
|
2011-04-05 22:11:06 +00:00
|
|
|
print "Problem finding arguments in:\n $msgid!\n";
|
|
|
|
$warn++;
|
|
|
|
} else {
|
2011-04-05 22:13:48 +00:00
|
|
|
foreach my $i (1..$n) {
|
|
|
|
my $arg = "%$i\\\$s";
|
2011-04-05 22:11:06 +00:00
|
|
|
if ( $msgstr !~ m/$arg/ ) {
|
|
|
|
print "Missing argument `$arg'\n '$msgid' ==> '$msgstr'\n";
|
|
|
|
$warn++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-04-05 21:48:55 +00:00
|
|
|
# Check colon at the end of a message
|
|
|
|
if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) {
|
|
|
|
print( "Missing or unexpected colon:\n" );
|
|
|
|
print( " '$msgid' => '$msgstr'\n" );
|
|
|
|
$warn++;
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check period at the end of a message; uncomment code if you are paranoid
|
|
|
|
#if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) {
|
|
|
|
# print( "Missing or unexpected period:\n" );
|
|
|
|
# print( " '$msgid' => '$msgstr'\n" );
|
|
|
|
# $warn++;
|
|
|
|
#}
|
|
|
|
|
|
|
|
# Check space at the end of a message
|
|
|
|
if ( ( $msgid =~ m/ *?(\|.*)?$/ ) != ( $msgstr =~ m/ *?(\|.*)?$/ ) ) {
|
|
|
|
print( "Missing or unexpected space:\n" );
|
|
|
|
print( " '$msgid' => '$msgstr'\n" );
|
|
|
|
$warn++;
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check for "&" shortcuts
|
|
|
|
if ( ( $msgid =~ m/&[^ ]/ ) != ( $msgstr =~ m/&[^ ]/ ) ) {
|
|
|
|
print( "Missing or unexpected Qt shortcut:\n" );
|
|
|
|
print( " '$msgid' => '$msgstr'\n" );
|
|
|
|
$warn++;
|
2003-03-10 12:57:57 +00:00
|
|
|
}
|
2011-04-05 21:48:55 +00:00
|
|
|
|
|
|
|
# Check for "|..." shortcuts
|
|
|
|
if ( ( $msgid =~ m/\|[^ ]/ ) != ( $msgstr =~ m/\|[^ ]/ ) ) {
|
|
|
|
print( "Missing or unexpected menu shortcut:\n" );
|
|
|
|
print( " '$msgid' => '$msgstr'\n" );
|
|
|
|
$warn++;
|
|
|
|
}
|
2011-04-05 22:11:06 +00:00
|
|
|
|
2011-04-05 21:48:55 +00:00
|
|
|
# we now collect these translations in a hash.
|
|
|
|
# this will allow us to check below if we have translated
|
|
|
|
# anything more than one way.
|
2011-04-05 22:13:48 +00:00
|
|
|
my $msgid_clean = lc($msgid);
|
|
|
|
my $msgstr_clean = lc($msgstr);
|
2011-04-05 21:48:55 +00:00
|
|
|
|
|
|
|
$msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts
|
|
|
|
$msgstr_clean =~ s/(.*)\|.*?$/$1/;
|
|
|
|
$msgid_clean =~ s/&([^ ])/$1/; # strip Qt shortcuts
|
|
|
|
$msgstr_clean =~ s/&([^ ])/$1/;
|
|
|
|
|
|
|
|
# this is a hash of hashes. the keys of the outer hash are
|
|
|
|
# cleaned versions of ORIGINAL strings. the keys of the inner hash
|
|
|
|
# are the cleaned versions of their TRANSLATIONS. The value for the
|
|
|
|
# inner hash is an array of the orignal string and translation.
|
|
|
|
$trans{$msgid_clean}{$msgstr_clean} = [ $msgid, $msgstr ];
|
2003-03-10 12:57:57 +00:00
|
|
|
}
|
2003-04-28 14:03:28 +00:00
|
|
|
|
|
|
|
foreach $msgid ( keys %trans ) {
|
2011-04-05 21:48:55 +00:00
|
|
|
# so $ref is a reference to the inner hash.
|
2011-04-05 22:13:48 +00:00
|
|
|
my $ref = $trans{$msgid};
|
2011-04-05 21:48:55 +00:00
|
|
|
# @msgstrkeys is an array of the keys of that inner hash.
|
2011-04-05 22:13:48 +00:00
|
|
|
my @msgstrkeys = keys %$ref;
|
2003-04-28 14:03:28 +00:00
|
|
|
|
2011-04-05 21:48:55 +00:00
|
|
|
# do we have more than one such key?
|
2003-04-28 14:03:28 +00:00
|
|
|
if ( $#msgstrkeys > 0 ) {
|
|
|
|
print( "Different translations for '$msgid':\n" );
|
|
|
|
foreach $msgstr ( @msgstrkeys ) {
|
|
|
|
print( " '" . $trans{$msgid}{$msgstr}[0] . "' => '" . $trans{$msgid}{$msgstr}[1] . "'\n" );
|
|
|
|
}
|
|
|
|
$warn++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-03-10 12:57:57 +00:00
|
|
|
print( "\nTotal number of warnings: $warn\n\n" );
|
|
|
|
}
|