mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-24 13:48:59 +00:00
292 lines
8.2 KiB
Perl
Executable File
292 lines
8.2 KiB
Perl
Executable File
#! /usr/bin/perl -w
|
|
# -*- mode: perl; -*-
|
|
|
|
# file pocheck.pl
|
|
#
|
|
# This file is part of LyX, the document processor.
|
|
# Licence details can be found in the file COPYING.
|
|
#
|
|
# author: Michael Gerz, michael.gerz@teststep.org
|
|
#
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Getopt::Std;
|
|
use Encode qw(encode decode);
|
|
|
|
sub mylc($);
|
|
sub replaceSynopsis($);
|
|
|
|
my $usage = <<EOT;
|
|
pocheck.pl [-acmpqst] po_file [po_file] ...
|
|
|
|
This script performs some consistency checks on po files.
|
|
|
|
We check for everything listed here, unless one or more of these
|
|
options is given, in which case we checks only for those requested.
|
|
-a: Check arguments, like %1\$s
|
|
-c: Check for colons at end
|
|
-m: Check for menu shortcuts
|
|
-p: Check for period at end
|
|
-q: Check Qt shortcuts
|
|
-s: Check for space at end
|
|
-t: Check for uniform translation
|
|
These options can be given with or without other options.
|
|
-f: Ignore fuzzy translations
|
|
-w: Only report summary total of errors
|
|
-i: Silent mode, report only errors
|
|
EOT
|
|
|
|
my %options;
|
|
getopts(":hacfmpqstwi", \%options);
|
|
|
|
if (defined($options{h})) {
|
|
print $usage;
|
|
exit 0;
|
|
}
|
|
|
|
my $only_total = defined($options{w});
|
|
delete $options{w} if $only_total;
|
|
my $ignore_fuzzy = defined($options{f});
|
|
delete $options{f} if $ignore_fuzzy;
|
|
my $silent_mode = defined($options{i});
|
|
delete $options{i} if $silent_mode;
|
|
|
|
my $check_args = (!%options or defined($options{a}));
|
|
my $check_colons = (!%options or defined($options{c}));
|
|
my $check_spaces = (!%options or defined($options{s}));
|
|
my $check_periods = (!%options or defined($options{p}));
|
|
my $check_qt = (!%options or defined($options{q}));
|
|
my $check_menu = (!%options or defined($options{m}));
|
|
my $check_trans = (!%options or defined($options{t}));
|
|
|
|
my %trans;
|
|
|
|
my $total_warn = 0;
|
|
|
|
foreach my $pofilename ( @ARGV ) {
|
|
my %bad;
|
|
if (!$silent_mode) {
|
|
print "Processing po file '$pofilename'...\n";
|
|
}
|
|
|
|
open( INPUT, "<$pofilename" )
|
|
|| die "Cannot read po file '$pofilename'";
|
|
my @pofile = <INPUT>;
|
|
close( INPUT );
|
|
|
|
undef( %trans );
|
|
keys( %trans ) = 10000;
|
|
|
|
my $noOfLines = $#pofile;
|
|
|
|
my $warn = 0;
|
|
|
|
my $i = 0;
|
|
my ($msgid, $msgid_trans, $msgstr, $more);
|
|
|
|
while ($i <= $noOfLines) {
|
|
my $linenum = $i;
|
|
( $msgid ) = ( $pofile[$i] =~ m/^msgid "(.*)"/ );
|
|
$i++;
|
|
next unless $msgid;
|
|
if ($ignore_fuzzy) {
|
|
my $previous = $pofile[$i - 2];
|
|
next if $previous =~ m/#,.*fuzzy/;
|
|
}
|
|
|
|
# some msgid's are more than one line long, so add those.
|
|
while ( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) {
|
|
$msgid = $msgid . $more;
|
|
$i++;
|
|
}
|
|
|
|
# now look for the associated msgstr.
|
|
until ( ( $msgstr ) = ( $pofile[$i] =~ m/^msgstr "(.*)"/ ) ) { $i++; };
|
|
$i++;
|
|
# again collect any extra lines.
|
|
while ( ( $i <= $noOfLines ) &&
|
|
( ( $more ) = $pofile[$i] =~ m/^"(.*)"/ ) ) {
|
|
$msgstr = $msgstr . $more;
|
|
$i++;
|
|
}
|
|
|
|
# nothing to do if one of them is empty.
|
|
# (surely that is always $msgstr?)
|
|
next if ($msgid eq "" or $msgstr eq "");
|
|
|
|
# discard [[...]] from the end of msgid, this is used only as hint to translation
|
|
$msgid_trans = $msgid; # used for uniform translation
|
|
$msgid =~ s/\[\[.*\]\]$//;
|
|
|
|
# Check for matching %1$s, etc.
|
|
if ($check_args) {
|
|
my @argstrs = ( $msgid =~ m/%(\d)\$s/g );
|
|
if (@argstrs) {
|
|
my $n = 0;
|
|
foreach my $arg (@argstrs) { $n = $arg if $arg > $n; }
|
|
if ($n <= 0) {
|
|
print "$pofilename, line $linenum: Problem finding arguments in:\n $msgid!\n"
|
|
unless $only_total;
|
|
++$bad{"Missing arguments"};
|
|
$warn++;
|
|
} else {
|
|
foreach my $i (1..$n) {
|
|
my $arg = "%$i\\\$s";
|
|
if ( $msgstr !~ m/$arg/ ) {
|
|
print "$pofilename, line $linenum: Missing argument `$arg'\n '$msgid' ==> '$msgstr'\n"
|
|
unless $only_total;
|
|
++$bad{"Missing arguments"};
|
|
$warn++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($check_colons) {
|
|
# Check colon at the end of a message
|
|
if ( ( $msgid =~ m/: *(\|.*)?$/ ) != ( $msgstr =~ m/: *(\|.*)?$/ ) ) {
|
|
print "Line $linenum: Missing or unexpected colon:\n '$msgid' => '$msgstr'\n"
|
|
unless $only_total;
|
|
++$bad{"Bad colons"};
|
|
$warn++;
|
|
}
|
|
}
|
|
|
|
if ($check_periods) {
|
|
# Check period at the end of a message; uncomment code if you are paranoid
|
|
# Convert '...' to '…' first
|
|
$msgid = replaceSynopsis($msgid);
|
|
$msgstr = replaceSynopsis($msgstr);
|
|
if ( ( $msgid =~ m/\. *(\|.*)?$/ ) != ( $msgstr =~ m/\. *(\|.*)?$/ ) ) {
|
|
print "Line $linenum: Missing or unexpected period:\n '$msgid' => '$msgstr'\n"
|
|
unless $only_total;
|
|
++$bad{"Bad periods"};
|
|
$warn++;
|
|
}
|
|
}
|
|
|
|
if ($check_spaces) {
|
|
# Check space at the end of a message (if not a shortcut)
|
|
my ($msgid1, $msgstr1) = ($msgid, $msgstr);
|
|
$msgid1 =~ s/\|.$//;
|
|
if ($msgstr =~ /^(.*)\|(.+)$/) {
|
|
my ($msg, $shortcut) = ($1, $2);
|
|
# Check for unicode char
|
|
my $u = decode('utf-8', $shortcut);
|
|
if (length($u) == 1) {
|
|
$msgstr1 = $msg;
|
|
}
|
|
}
|
|
if (($msgid1 =~ / $/) != ($msgstr1 =~ / $/)) {
|
|
print "Line $linenum: Missing or unexpected space:\n '$msgid' => '$msgstr'\n"
|
|
unless $only_total;
|
|
++$bad{"Bad spaces"};
|
|
$warn++;
|
|
}
|
|
}
|
|
|
|
if ($check_qt) {
|
|
# Check for "&" shortcuts
|
|
if ( ( $msgid =~ m/&[^ &]/ ) != ( $msgstr =~ m/&[^ &]/ ) ) {
|
|
print "Line $linenum: Missing or unexpected Qt shortcut:\n '$msgid' => '$msgstr'\n"
|
|
unless $only_total;
|
|
++$bad{"Bad Qt shortcuts"};
|
|
$warn++;
|
|
}
|
|
}
|
|
|
|
if ($check_menu) {
|
|
# Check for "|..." shortcuts (space shortcut allowed)
|
|
# Shortcut is either 1 char (ascii in msgid) or utf8 char (in msgstr)
|
|
my ($s1, $s2) = (0,0);
|
|
$s1 = 1 if ($msgid =~ /\|(.)$/);
|
|
if ($msgstr =~ /.*\|(.+)$/) {
|
|
my $chars = $1;
|
|
my $u = decode('utf-8', $chars);
|
|
$s2 = 1 if (length($u) == 1);
|
|
}
|
|
if($s1 != $s2) {
|
|
print "Line $linenum: Missing or unexpected menu shortcut:\n '$msgid' => '$msgstr'\n"
|
|
unless $only_total;
|
|
++$bad{"Bad menu shortcuts"};
|
|
$warn++;
|
|
}
|
|
}
|
|
|
|
next unless $check_trans;
|
|
|
|
# we now collect these translations in a hash.
|
|
# this will allow us to check below if we have translated
|
|
# anything more than one way.
|
|
my $msgid_clean = lc($msgid_trans);
|
|
my $msgstr_clean = mylc($msgstr);
|
|
|
|
$msgid_clean =~ s/(.*)\|.*?$/$1/; # strip menu shortcuts
|
|
$msgstr_clean =~ s/(.*)\|.*?$/$1/;
|
|
$msgid_clean =~ s/&([^ ])/$1/; # strip Qt shortcuts
|
|
$msgstr_clean =~ s/&([^ ])/$1/;
|
|
|
|
# this is a hash of hashes. the keys of the outer hash are
|
|
# cleaned versions of ORIGINAL strings. the keys of the inner hash
|
|
# are the cleaned versions of their TRANSLATIONS. The value for the
|
|
# inner hash is an array of the orignal string and translation.
|
|
$trans{$msgid_clean}{$msgstr_clean} = [ $msgid_trans, $msgstr, $linenum ];
|
|
}
|
|
|
|
if ($check_trans) {
|
|
foreach $msgid ( keys %trans ) {
|
|
# so $ref is a reference to the inner hash.
|
|
my $ref = $trans{$msgid};
|
|
# @msgstrkeys is an array of the keys of that inner hash.
|
|
my @msgstrkeys = keys %$ref;
|
|
|
|
# do we have more than one such key?
|
|
if ( $#msgstrkeys > 0 ) {
|
|
if (!$only_total) {
|
|
print "Different translations for '$msgid':\n";
|
|
foreach $msgstr ( @msgstrkeys ) {
|
|
print "Line $ref->{$msgstr}[2]: '" .
|
|
$ref->{$msgstr}[0] . "' => '" .
|
|
$ref->{$msgstr}[1] . "'\n";
|
|
}
|
|
}
|
|
++$bad{"Inconsistent translations"};
|
|
$warn++;
|
|
}
|
|
}
|
|
}
|
|
if (!$silent_mode) {
|
|
if ($warn) {
|
|
while (my ($k, $v) = each %bad) { print "$k: $v\n"; }
|
|
if (scalar(keys %bad) > 1) {
|
|
print "Total warnings: $warn\n";
|
|
}
|
|
} else {
|
|
print "No warnings!\n";
|
|
}
|
|
print "\n";
|
|
}
|
|
$total_warn += $warn;
|
|
}
|
|
|
|
exit ($total_warn > 0);
|
|
|
|
# Use lowercase also for non-ascii chars
|
|
sub mylc($)
|
|
{
|
|
my ($msg) = @_;
|
|
return(encode('utf-8',lc(decode('utf-8', $msg))));
|
|
}
|
|
|
|
sub replaceSynopsis($)
|
|
{
|
|
my ($string) = @_;
|
|
|
|
return ($string) if ($string !~ /^(.*)\.\.\.(.*)$/);
|
|
my ($before, $after) = ($1, $2);
|
|
return $string if (($before =~ /\.$/) || ($after =~ /^\./));
|
|
return("$before…$after");
|
|
}
|