Cmake build tests: check urls optimized

The original test took about ~50 minutes. Making the checks in parallel, now it takes about 150 seconds.
2024-11-22 10:00:33 +00:00 · 2024-10-06 18:04:00 +02:00 · 2024-10-06 18:04:00 +02:00 · 3f48486a5c
commit 3f48486a5c
parent a16e4b206a
4 changed files with 310 additions and 203 deletions
--- a/development/checkurls/CheckURL.pm
+++ b/development/checkurls/CheckURL.pm
@ -1,5 +1,6 @@
 # -*- mode: perl; -*-
 package CheckURL;
 # file CheckURL.pm
 #
 # This file is part of LyX, the document processor.
@ -14,6 +15,7 @@ package CheckURL;
 use strict;
 our (@EXPORT, @ISA);
 BEGIN {
  use Exporter ();
  @ISA    = qw(Exporter);
@ -25,16 +27,18 @@ sub check_http_url($$$$);
 sub check_ftp_dir_entry($$);
 sub check_ftp_url($$$$);
 sub check_unknown_url($$$$);
-sub check_url($$);
+sub check_url($$$$);
 ################
-sub check_http_url($$$$)
+my $fe;
-{
+my $fs;
 sub check_http_url($$$$) {
  require LWP::UserAgent;
  my ($protocol, $host, $path, $file) = @_;
-  my $ua = LWP::UserAgent->new;
+  my $ua   = LWP::UserAgent->new(timeout => 20);
  my $getp = "/";
  if ($path ne "") {
    $getp .= $path;
@ -54,7 +58,7 @@ sub check_http_url($$$$)
    $buf = $response->decoded_content;
  }
  else {
-    print " " . $response->status_line . ": ";
+    print $fe " " . $response->status_line . ": ";
    return 3;
  }
  my @title = ();
@ -66,9 +70,9 @@ sub check_http_url($$$$)
    $title =~ s/^ //;
    $title =~ s/ $//;
    push(@title, $title);
-    print "title = \"$title\": ";
+    print $fe "title = \"$title\": ";
    if ($title =~ /Error 404|Not Found/) {
-      print " Page reports 'Not Found' from \"$protocol://$host$getp\": ";
+      print $fe " Page reports 'Not Found' from \"$protocol://$host$getp\": ";
      $res = 3;
    }
  }
@ -79,13 +83,12 @@ sub check_http_url($$$$)
 # returns 0, x if file does not match entry
 #         1, x everything OK
 #         2, x if not accesible (permission)
-sub check_ftp_dir_entry($$)
+sub check_ftp_dir_entry($$) {
 {
  my ($file, $e) = @_;
  my $other = '---';
  my $isdir = 0;
-  #print "Checking '$file' against '$e'\n";
+  #print $fe "Checking '$file' against '$e'\n";
  $file =~ s/^\///;
  $isdir = 1 if ($e =~ /^d/);
  return (0, $isdir) if ($e !~ /\s$file$/);
@ -93,25 +96,26 @@ sub check_ftp_dir_entry($$)
    $other = $1;
  }
  else {
-    #print "Invalid entry\n";
+    #print $fe "Invalid entry\n";
    # Invalid entry
    return (0, $isdir);
  }
  return (2, $isdir) if ($other !~ /^r/);    # not readable
  if ($isdir) {
    #return(2,$isdir) if ($other !~ /x$/); # directory, but not executable
  }
  return (1, $isdir);
 }
-sub check_ftp2_url($$$$)
+sub check_ftp2_url($$$$) {
 {
  my ($protocol, $host, $path, $file) = @_;
  my $checkentry = 1;
-  print "\nhost $host\n";
+
-  print "path $path\n";
+  #print $fe "\nhost $host\n";
-  print "file $file\n";
+  #print $fe "path $path\n";
  #print $fe "file $file\n";
  my $url = "$protocol://$host";
  $path =~ s/\/$//;
  if (defined($file)) {
@ -120,7 +124,8 @@ sub check_ftp2_url($$$$)
  else {
    $url = "$url/$path/.";
  }
-  print "curl $url, file = $file\n";
+
  #print $fe "curl $url, file = $file\n";
  my %listfiles = ();
  if (open(FFTP, "curl --anyauth -l $url|")) {
    while (my $l = <FFTP>) {
@ -148,15 +153,14 @@ sub check_ftp2_url($$$$)
  }
 }
-sub check_ftp_url($$$$)
+sub check_ftp_url($$$$) {
 {
  use Net::FTP;
  my ($protocol, $host, $path, $file) = @_;
  my $res     = 0;
  my $message = "";
-  my $ftp = Net::FTP->new($host, Debug => 0, Timeout => 120);
+  my $ftp = Net::FTP->new($host, Debug => 0, Timeout => 20);
  if (!$ftp) {
    return (3, "Cannot connect to $host");
  }
@ -167,7 +171,8 @@ sub check_ftp_url($$$$)
  else {
    my $rEntries;
    if ($path ne "") {
-      #print "Path = $path\n";
+
      #print $fe "Path = $path\n";
      #if (!$ftp->cwd($path)) {
      # $message = $ftp->message;
      # $res = 3;
@ -185,13 +190,15 @@ sub check_ftp_url($$$$)
      my $found  = 0;
      my $found2 = 0;
      for my $f (@{$rEntries}) {
-	#print "Entry: $path $f\n";
+
        #print $fe "Entry: $path $f\n";
        my ($res1, $isdir) = check_ftp_dir_entry($file, $f);
        if ($res1 == 1) {
          $found = 1;
          last;
        }
        elsif ($res1 == 2) {
          # found, but not accessible
          $found2  = 1;
          $message = "Permission denied for '$file'";
@ -206,12 +213,12 @@ sub check_ftp_url($$$$)
    }
  }
  $ftp->quit;
-  #print "returning ($res,$message)\n";
+
  #print $fe "returning ($res,$message)\n";
  return ($res, $message);
 }
-sub check_unknown_url($$$$)
+sub check_unknown_url($$$$) {
 {
  use LWP::Simple;
  my ($protocol, $host, $path, $file) = @_;
@ -227,16 +234,18 @@ sub check_unknown_url($$$$)
    }
  }
  if (defined($file)) {
-    #print "Trying $url$file\n";
+
    #print $fe "Trying $url$file\n";
    $res = head("$url/$file");
    if (!$res) {
      # try to check for directory '/';
-      #print "Trying $url$file/\n";
+      #print $fe "Trying $url$file/\n";
      $res = head("$url/$file/");
    }
  }
  else {
-    #print "Trying $url\n";
+    #print $fe "Trying $url\n";
    $res = head($url);
  }
  return (!$res);
@ -244,9 +253,10 @@ sub check_unknown_url($$$$)
 #
 # Main entry
-sub check_url($$)
+sub check_url($$$$) {
-{
+  my ($url, $use_curl, $fex, $fsx) = @_;
-  my($url,$use_curl) = @_;
+  $fe = $fex;
  $fs = $fsx;
  my $file = undef;
  my ($protocol, $host, $path);
@ -261,6 +271,7 @@ sub check_url($$)
    if ($path =~ s/\/([^\/]+)$//) {
      $file = $1;
      if ($file =~ / /) {
        # Filename contains ' ', maybe invalid. Don't check
        $file = undef;
      }
@ -268,7 +279,7 @@ sub check_url($$)
    }
  }
  else {
-    print " Invalid url '$url'";
+    print $fe " Invalid url '$url'";
    return 2;
  }
  if ($protocol =~ /^https?$/) {
@ -286,7 +297,7 @@ sub check_url($$)
  }
  else {
    # it never should reach this point
-    print " What protocol is '$protocol'?";
+    print $fe " What protocol is '$protocol'?";
    $res = check_unknown_url($protocol, $host, $path, $file);
    return $res;
  }
--- a/development/checkurls/knownInvalidURLS
+++ b/development/checkurls/knownInvalidURLS
@ -2,3 +2,4 @@ http://www.uon.edu/doe
 ftp://www.test.test
 http://www.test.test
 #proto://host.xx.ab/abcd
 http://example.com/%20foo
--- a/development/checkurls/knownToRegisterURLS
+++ b/development/checkurls/knownToRegisterURLS
@ -10,6 +10,7 @@ http://jasa.peerx-press.org/html/jasa/Using_LaTeX
 http://spie.org/app/Publications/index.cfm?fuseaction=authinfo&type=proceedings
 http://www.jstatsoft.org/downloads/JSSstyle.zip
 http://www.photogrammetry.ethz.ch/tarasp_workshop/isprs.cls
 https://journals.aps.org/revtex
 # The following ftp url is correct, but
 # ftp commands like 'dir', 'get' do not work.
--- a/development/checkurls/search_url.pl
+++ b/development/checkurls/search_url.pl
@ -38,6 +38,8 @@ BEGIN  {
  unshift(@INC, "$p");
 }
 use warnings;
 use Cwd qw(abs_path);
 use CheckURL;
 use Try::Tiny;
 use locale;
@ -46,12 +48,17 @@ use POSIX qw(locale_h);
 setlocale(LC_CTYPE,    "");
 setlocale(LC_MESSAGES, "en_US.UTF-8");
 use File::Temp qw/ tempfile tempdir /;
 use File::Spec;
 use Fcntl qw(:flock SEEK_END);
 # Prototypes
 sub printNotUsedURLS($\%);
 sub replaceSpecialChar($);
 sub readUrls($\%);
 sub parse_file($ );
 sub handle_url($$$ );
 sub printx($$$$);
 ##########
 my %URLS                = ();
@ -67,6 +74,7 @@ for my $arg (@ARGV) {
  die("Bad argument \"$arg\"") if ($arg !~ /=/);
  my ($type, $val) = split("=", $arg);
  if ($type eq "filesToScan") {
    #The file should be a list of files to search in
    if (open(FLIST, $val)) {
      while (my $l = <FLIST>) {
@ -103,6 +111,8 @@ for my $arg (@ARGV) {
 }
 my @urls     = sort keys %URLS, keys %extraURLS;
 my @testvals = ();
 # Tests
 #my @urls = ("ftp://ftp.edpsciences.org/pub/aa/readme.html", "ftp://ftp.springer.de/pub/tex/latex/compsc/proc/author");
 my $errorcount = 0;
@ -128,12 +138,74 @@ for my $u (@urls) {
  }
  next if ($checkSelectedOnly && !defined($selectedURLS{$u}));
  $URLScount++;
-  print "Checking '$u': ";
+  push(@testvals, {u => $u, use_curl => $use_curl,});
 }
 # Ready to go multitasking
 my ($vol, $dir, $file) = File::Spec->splitpath($summaryFile);
 my $tempdir   = tempdir("$dir/CounterXXXXXXX", CLEANUP => 1);
 my $countfile = "$tempdir/counter";
 my $counter   = 0;
 if (open(my $FO, '>', $countfile)) {
  print {$FO} $counter;
  close($FO);
 }
 else {
  unlink($countfile);
  die("Could not write to $countfile");
 }
 print "Using tempdir \"" . abs_path($tempdir) . "\"\n";
 my @wait = ();
 for (my $i = 0; $i < 10; $i++) {    # Number of subprocesses
  my $pid = fork();
  if ($pid == 0) {
    # I am child
    open(my $fe, '>:encoding(UTF-8)', "$tempdir/xxxError$i");
    open(my $fs, '>:encoding(UTF-8)', "$tempdir/xxxSum$i");
    while (1) {
      open(my $fh, '+<', $countfile) or die("cannot open $countfile");
      flock($fh, LOCK_EX)            or die "$i: Cannot lock $countfile - $!\n";
      my $l    = <$fh>;    # get actual count number
      my $diff = undef;
      if (defined($testvals[$l + 150])) {
        $diff = 5;
      }
      elsif (defined($testvals[$l + 50])) {
        $diff = 3;
      }
      elsif (defined($testvals[$l + 20])) {
        $diff = 2;
      }
      elsif (defined($testvals[$l])) {
        $diff = 1;
      }
      else {
        close($fs);
        print $fe "NumberOfErrors $errorcount\n";
        close($fe);
        exit(0);
      }
      my $next = $l + $diff;
      seek($fh, 0, 0);
      truncate($fh, 0);
      print $fh $next;
      close($fh);
      for (my $i = 0; $i < $diff; $i++) {
        my $entryidx = $l + $i;
        my $rentry   = $testvals[$entryidx];
        next if (!defined($rentry));
        my $u        = $rentry->{u};
        my $use_curl = $rentry->{use_curl};
        print $fe "Checking($entryidx) '$u': ";
        my ($res, $prnt, $outSum);
        try {
-    $res = check_url($u, $use_curl);
+          $res = check_url($u, $use_curl, $fe, $fs);
          if ($res) {
-      print "Failed\n";
+            print $fe "Failed\n";
            $prnt   = "";
            $outSum = 1;
          }
@ -147,7 +219,7 @@ for my $u (@urls) {
          $outSum = 1;
          $res    = 700;
        };
-  printx("$prnt", $outSum);
+        printx("$prnt", $outSum, $fe, $fs);
        my $printSourceFiles = 0;
        my $err_txt          = "Error url:";
@ -159,13 +231,13 @@ for my $u (@urls) {
        }
        $res = !$res if (defined($revertedURLS{$u}));
        if ($res || $checkSelectedOnly) {
-    printx("$err_txt \"$u\"\n", $outSum);
+          printx("$err_txt \"$u\"\n", $outSum, $fe, $fs);
        }
        if ($printSourceFiles) {
          if (defined($URLS{$u})) {
            for my $f (sort keys %{$URLS{$u}}) {
              my $lines = ":" . join(',', @{$URLS{$u}->{$f}});
-	printx("  $f$lines\n", $outSum);
+              printx("  $f$lines\n", $outSum, $fe, $fs);
            }
          }
          if ($res) {
@ -173,6 +245,33 @@ for my $u (@urls) {
          }
        }
      }
    }
  }
  $wait[$i] = $pid;
 }
 for (my $i = 0; $i < 10; $i++) {
  my $p = $wait[$i];
  if ($p > 0) {
    waitpid($p, 0);
    open(my $fe, '<', "$tempdir/xxxError$i");
    while (my $l = <$fe>) {
      if ($l =~ /^NumberOfErrors\s(\d+)/) {
        $errorcount += $1;
      }
      else {
        print $l;
      }
    }
    close($fe);
    open(my $fs, '<', "$tempdir/xxxSum$i");
    while (my $l = <$fs>) {
      print SFO $l;
    }
    close($fs);
  }
 }
 unlink($countfile);
 if (%URLS) {
  printNotUsedURLS("Ignored",      %ignoredURLS);
@ -187,17 +286,15 @@ if (defined($summaryFile)) {
 exit($errorcount);
 ###############################################################################
-sub printx($$)
+sub printx($$$$) {
-{
+  my ($txt, $outSum, $fe, $fs) = @_;
-  my ($txt, $outSum) = @_;
+  print $fe "$txt";
  print "$txt";
  if ($outSum && defined($summaryFile)) {
-    print SFO "$txt";
+    print $fs "$txt";
  }
 }
-sub printNotUsedURLS($\%)
+sub printNotUsedURLS($\%) {
 {
  my ($txt, $rURLS) = @_;
  my @msg = ();
  for my $u (sort keys %{$rURLS}) {
@ -215,15 +312,13 @@ sub printNotUsedURLS($\%)
  }
 }
-sub replaceSpecialChar($)
+sub replaceSpecialChar($) {
 {
  my ($l) = @_;
  $l =~ s/\\SpecialChar(NoPassThru)?\s*(TeX|LaTeX|LyX)[\s]?/\2/;
  return ($l);
 }
-sub readUrls($\%)
+sub readUrls($\%) {
 {
  my ($file, $rUrls) = @_;
  die("Could not read file $file") if (!open(ULIST, $file));
@ -245,8 +340,7 @@ sub readUrls($\%)
  close(ULIST);
 }
-sub parse_file($)
+sub parse_file($) {
 {
  my ($f) = @_;
  my $status = "out";    # outside of URL/href
@ -257,6 +351,7 @@ sub parse_file($)
      $line++;
      $l =~ s/[\r\n]+$//;    #  Simulate chomp
      if ($status eq "out") {
        # searching for "\begin_inset Flex URL"
        if ($l =~ /^\s*\\begin_inset\s+Flex\s+URL\s*$/) {
          $status = "inUrlInset";
@ -296,8 +391,7 @@ sub parse_file($)
  }
 }
-sub handle_url($$$)
+sub handle_url($$$) {
 {
  my ($url, $f, $line) = @_;
  $url = &replaceSpecialChar($url);