#! /usr/bin/env perl # -*- mode: perl; -*- # # file diff_po.pl # script to compare changes between translation files before merging them # # Examples of usage: # ./diff_po.pl cs.po.old cs.po # svn diff -r38367 --diff-cmd ./diff_po.pl cs.po # git difftool --extcmd=./diff_po.pl sk.po # ./diff_po.pl -rHEAD~100 cs.po #fetch git revision and compare # ./diff_po.pl -r39229 cs.po #fetch svn revision and compare # ./diff_po.pl -r-1 cs.po #fetch the previous change of cs.po and compare # # This file is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this software; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Copyright (c) 2010-2013 Kornel Benko, kornel@lyx.org # # TODO: # 1.) Search for good correlations of deleted <==> inserted string # using Text::Levenshtein or Algorithm::Diff # # val: '0' | '1' ; # # fuzzyopt: '--display-fuzzy=' val ; # # untranslatedopt: '--display-untranslated=' val ; # # option: fuzzyopt # | untranslatedopt # ; # options: | options option # ; # # revspec: revision-tag # e.g. 46c00bab7 # | 'HEAD' relative-rev # e.g. HEAD~3, HEAD-3 # | '-' number # -1 == previous commit of the following po-file # ; # # revision: '-r' revspec ; # # filespecold: revision | filespec ; # # filespec: # path to existing po-file # # filespecnew: filespec ; # # files: filespecold filespecnew ; # # diff: 'diff_po.pl' ' ' options files # BEGIN { use File::Spec; my $p = File::Spec->rel2abs( __FILE__ ); $p =~ s/[\/\\]?[^\/\\]+$//; unshift(@INC, "$p"); } # Prototypes sub get_env_name($ ); sub buildParentDir($$); sub searchRepo($); sub diff_po(@); sub check_po_file_readable($$); sub printDiff($$$$); sub printIfDiff($$$); sub printExtraMessages($$$); sub getrev($$$); ######### use strict; use parsePoLine; use Term::ANSIColor qw(:constants); use File::Temp; use Cwd qw(abs_path getcwd); my ($status, $foundline, $msgid, $msgstr, $fuzzy); my %Messages = (); # Used for original po-file my %newMessages = (); # new po-file my %Untranslated = (); # inside new po-file my %Fuzzy = (); # inside new po-file my $result = 0; # exit value my $printlines = 1; my @names = (); my %options = ( "--display-fuzzy" => 1, "--display-untranslated" => 1, ); # Check for options my ($opt, $val); sub get_env_name($) { my ($e) = @_; return undef if ($e !~ s/^\-\-//); $e = uc($e); $e =~ s/\-/_/g; return "DIFF_PO_" . $e; } # Set option-defaults from environment # git: not needed, diff is not recursive here # svn: needed to pass options through --diff-cmd parameter # hg: needed to pass options through extdiff parameter for my $opt (keys %options) { my $e = get_env_name($opt); if (defined($e)) { if (defined($ENV{$e})) { $options{$opt} = $ENV{$e}; } } } while (($opt=$ARGV[0]) =~ s/=(\d+)$//) { $val = $1; if (defined($options{$opt})) { $options{$opt} = $val; my $e = get_env_name($opt); if (defined($e)) { $ENV{$e} = $val; } shift(@ARGV); } else { die("illegal option \"$opt\"\n"); } } # Check first, if called as standalone program for git if ($ARGV[0] =~ /^-r(.*)/) { my $rev = $1; shift(@ARGV); if ($rev eq "") { $rev = shift(@ARGV); } # convert arguments to full path ... for my $argf1 (@ARGV) { $argf1 = abs_path($argf1); } for my $argf (@ARGV) { #my $argf = abs_path($argf1); my $baseargf; my $filedir; if ($argf =~ /^(.*)\/([^\/]+)$/) { $baseargf = $2; $filedir = $1; chdir($filedir); # set working directory for the repo-command } else { $baseargf = $argf; $filedir = "."; } $filedir = getcwd(); my ($repo, $level) = searchRepo($filedir); my $relargf = $baseargf; # argf relative to the top-most repo directory my $topdir; if (defined($level)) { my $abspathpo = $filedir; # directory of the po-file $topdir = $abspathpo; #print "Level = $level, abs path = $abspathpo\n"; while ($level > 0) { $topdir =~ s/\/([^\/]+)$//; $relargf = "$1/$relargf"; $level--; #print "Level = $level, topdir = $topdir, rel path = $relargf\n"; } chdir($topdir); } else { print "Could not find the repo-type\n"; exit(-1); } #check po-file check_po_file_readable($baseargf, $relargf); if ($repo eq ".git") { my @args = (); my $tmpfile = File::Temp->new(); $rev = getrev($repo, $rev, $argf); push(@args, "-L", $argf . " (" . $rev . ")"); push(@args, "-L", $argf . " (local copy)"); print "git show $rev:$relargf\n"; open(FI, "git show $rev:$relargf|"); $tmpfile->unlink_on_destroy( 1 ); while(my $l = <FI>) { print $tmpfile $l; } close(FI); $tmpfile->seek( 0, SEEK_END ); # Flush() push(@args, $tmpfile->filename, $argf); print "===================================================================\n"; diff_po(@args); } elsif ($repo eq ".svn") { # program svnversion needed here $rev = getrev($repo, $rev, $argf); # call it again indirectly my @cmd = ("svn", "diff", "-r$rev", "--diff-cmd", $0, $relargf); print "cmd = " . join(' ', @cmd) . "\n"; system(@cmd); } elsif ($repo eq ".hg") { # for this to work, one has to edit ~/.hgrc # Insert there # [extensions] # hgext.extdiff = # $rev = getrev($repo, $rev, $argf); my @cmd = ("hg", "extdiff", "-r", "$rev", "-p", $0, $relargf); print "cmd = " . join(' ', @cmd) . "\n"; system(@cmd); } } } else { diff_po(@ARGV); } exit($result); ######################################################### # This routine builds n-th parent-path # E.g. buildParentDir("abc", 1) --> "abc/.." # buildParentDir("abc", 4) --> "abc/../../../.." sub buildParentDir($$) { my ($dir, $par) = @_; if ($par > 0) { return buildParentDir("$dir/..", $par-1); } else { return $dir; } } # Tries up to 10 parent levels to find the repo-type # Returns the repo-type sub searchRepo($) { my ($dir) = @_; for my $parent ( 0 .. 10 ) { my $f = buildParentDir($dir, $parent); for my $s (".git", ".svn", ".hg") { if (-d "$f/$s") { #print "Found repo on level $parent\n"; return ($s, $parent); } } } return(""); # not found } sub diff_po(@) { my @args = @_; %Messages = (); %newMessages = (); %Untranslated = (); %Fuzzy = (); @names = (); my $switchargs = 0; while(defined($args[0])) { last if ($args[0] !~ /^\-/); my $param = shift(@args); if ($param eq "-L") { my $name = shift(@args); push(@names, $name); } else { # ignore other options } } if (! defined($names[0])) { push(@names, "original"); } if (! defined($names[1])) { push(@names, "new"); } if (@args != 2) { die("names = \"", join('" "', @names) . "\"... args = \"" . join('" "', @args) . "\" Expected exactly 2 parameters"); } check_po_file_readable($names[0], $args[0]); check_po_file_readable($names[1], $args[1]); parse_po_file($args[0], %Messages); parse_po_file($args[1], %newMessages); my @MsgKeys = getLineSortedKeys(%newMessages); print RED "<<< \"$names[0]\"\n", RESET; print GREEN ">>> \"$names[1]\"\n", RESET; for my $k (@MsgKeys) { if ($newMessages{$k}->{msgstr} eq "") { # this is still untranslated string $Untranslated{$newMessages{$k}->{line}} = $k; } elsif ($newMessages{$k}->{fuzzy}) { #fuzzy string # mark only, if not in alternative area if (! $newMessages{$k}->{alternative}) { $Fuzzy{$newMessages{$k}->{line}} = $k; } } if (exists($Messages{$k})) { printIfDiff($k, $Messages{$k}, $newMessages{$k}); delete($Messages{$k}); delete($newMessages{$k}); } } if (0) { @MsgKeys = sort keys %Messages, keys %newMessages; for my $k (@MsgKeys) { if (defined($Messages{$k})) { $result |= 8; print "deleted message\n"; print "< line = " . $Messages{$k}->{line} . "\n" if ($printlines); print RED "< fuzzy = " . $Messages{$k}->{fuzzy} . "\n", RESET; print RED "< msgid = \"$k\"\n", RESET; print RED "< msgstr = \"" . $Messages{$k}->{msgstr} . "\"\n", RESET; } if (defined($newMessages{$k})) { $result |= 16; print "new message\n"; print "> line = " . $newMessages{$k}->{line} . "\n" if ($printlines); print GREEN "> fuzzy = " . $newMessages{$k}->{fuzzy} . "\n", RESET; print GREEN "> msgid = \"$k\"\n", RESET; print GREEN "> msgstr = \"" . $newMessages{$k}->{msgstr} . "\"\n", RESET; } } } else { @MsgKeys = getLineSortedKeys(%Messages); for my $k (@MsgKeys) { $result |= 8; print "deleted message\n"; print "< line = " . $Messages{$k}->{line} . "\n" if ($printlines); print RED "< fuzzy = " . $Messages{$k}->{fuzzy} . "\n", RESET; print RED "< msgid = \"$k\"\n", RESET; print RED "< msgstr = \"" . $Messages{$k}->{msgstr} . "\"\n", RESET; } @MsgKeys = getLineSortedKeys(%newMessages); for my $k (@MsgKeys) { $result |= 16; print "new message\n"; print "> line = " . $newMessages{$k}->{line} . "\n" if ($printlines); print GREEN "> fuzzy = " . $newMessages{$k}->{fuzzy} . "\n", RESET; print GREEN "> msgid = \"$k\"\n", RESET; print GREEN "> msgstr = \"" . $newMessages{$k}->{msgstr} . "\"\n", RESET; } } if ($options{"--display-fuzzy"}) { printExtraMessages("fuzzy", \%Fuzzy, \@names); } if ($options{"--display-untranslated"}) { printExtraMessages("untranslated", \%Untranslated, \@names); } } sub check_po_file_readable($$) { my ($spec, $filename) = @_; if (! -e $filename ) { die("$spec po file does not exist"); } if ( ! -f $filename ) { die("$spec po file is not regular"); } if ( ! -r $filename ) { die("$spec po file is not readable"); } } # Diff of one corresponding entry sub printDiff($$$$) { my ($k, $nk, $rM, $rnM) = @_; print "diffline = " . $rM->{line} . "," . $rnM->{line} . "\n" if ($printlines); print " msgid = \"$k\"\n"; if ($rM->{fuzzy} eq $rnM->{fuzzy}) { print " fuzzy = \"" . $rM->{fuzzy} . "\"\n" if ($printlines); } else { print RED "< fuzzy = \"" . $rM->{fuzzy} . "\"\n", RESET; } print RED "< msgstr = \"" . $rM->{msgstr} . "\"\n", RESET; if ($k ne $nk) { print GREEN "> msgid = \"$nk\"\n", RESET; } if ($rM->{fuzzy} ne $rnM->{fuzzy}) { print GREEN "> fuzzy = \"" . $rnM->{fuzzy} . "\"\n", RESET; } print GREEN "> msgstr = \"" . $rnM->{msgstr} . "\"\n", RESET; print "\n"; } sub printIfDiff($$$) { my ($k, $rM, $rnM) = @_; my $doprint = 0; $doprint = 1 if ($rM->{fuzzy} != $rnM->{fuzzy}); $doprint = 1 if ($rM->{msgstr} ne $rnM->{msgstr}); if ($doprint) { $result |= 4; printDiff($k, $k, $rM, $rnM); } } sub printExtraMessages($$$) { my ($type, $rExtra, $rNames) = @_; #print "file1 = $rNames->[0], file2 = $rNames->[1]\n"; my @sortedExtraKeys = sort { $a <=> $b;} keys %{$rExtra}; if (@sortedExtraKeys > 0) { print "Still " . 0 + @sortedExtraKeys . " $type messages found in $rNames->[1]\n"; for my $l (@sortedExtraKeys) { print "> line $l: \"" . $rExtra->{$l} . "\"\n"; } } } # # get repository dependent revision representation sub getrev($$$) { my ($repo, $rev, $argf) = @_; my $revnum; if ($rev eq "HEAD") { $revnum = 0; } else { return $rev if ($rev !~ /^(-|HEAD[-~])(\d+)$/); $revnum = $2; } if ($repo eq ".hg") { # try to get the revision of n-th previous change of the po-file if (open(FIR, "hg log '$argf'|")) { my $count = $revnum; my $res = "-$revnum"; while (my $l = <FIR>) { chomp($l); if ($l =~ /:\s+(\d+):([^\s]+)$/) { $res = $2; last if ($count-- <= 0); } } close(FIR); return($res); } else { return "-$revnum"; } } elsif ($repo eq ".git") { # try to get the revision of n-th previous change of the po-file if (open(FIR, "git log --skip=$revnum -1 '$argf'|")) { my $res = "HEAD~$revnum"; while (my $l = <FIR>) { chomp($l); if ($l =~ /^commit\s+([^\s]+)$/) { $res = $1; last; } } close(FIR); return($res); } else { return("HEAD~$revnum"); } } elsif ($repo eq ".svn") { if (open(FIR, "svn log '$argf'|")) { my $count = $revnum; my $res = $rev; while (my $l = <FIR>) { chomp($l); if ($l =~ /^r(\d+)\s+\|/) { $res = $1; last if ($count-- <= 0); } } close(FIR); return $res; } else { if (open(VI, "svnversion |")) { while (my $r1 = <VI>) { chomp($r1); if ($r1 =~ /^((\d+):)?(\d+)M?$/) { $rev = $3-$revnum; } } close(VI); } return $rev; } } return $rev; }