From 689a144824199277abf25dc4c0db609d307bab3e Mon Sep 17 00:00:00 2001 From: Kornel Benko Date: Sun, 3 Jan 2016 15:25:09 +0100 Subject: [PATCH] Cmake url tests: Use more sophiticated check for urls. --- development/checkurls/CMakeLists.txt | 3 ++ development/checkurls/CheckURL.pm | 54 +++++++++-------------- development/checkurls/knownToRegisterURLS | 7 +++ development/checkurls/search_url.pl | 15 ++++--- 4 files changed, 42 insertions(+), 37 deletions(-) create mode 100644 development/checkurls/knownToRegisterURLS diff --git a/development/checkurls/CMakeLists.txt b/development/checkurls/CMakeLists.txt index 20af8f9369..debedc67b2 100644 --- a/development/checkurls/CMakeLists.txt +++ b/development/checkurls/CMakeLists.txt @@ -28,6 +28,7 @@ add_test(NAME "check_accessible_urls" "filesToScan=${LYXFILES_FILE}" "ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS" "ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS" + "knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS" "summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedAccessibleURLS.log") # Test inaccessible, but revert the error marker (failed <=> passed) @@ -39,6 +40,7 @@ add_test(NAME "check_inaccessible_urls" "filesToScan=${LYXFILES_FILE}" "selectedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS" "revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS" + "knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS" "summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedInaccessibleURLS.log") # @@ -50,6 +52,7 @@ add_test(NAME "check_invalid_urls" COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}" "extraURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS" "revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS" + "knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS" "summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedKnownInvalidURLS.log") set(URL_TEST_NAMES "check_accessible_urls" "check_inaccessible_urls" "check_invalid_urls") diff --git a/development/checkurls/CheckURL.pm b/development/checkurls/CheckURL.pm index b7d0e032d3..d0a8fad65c 100755 --- a/development/checkurls/CheckURL.pm +++ b/development/checkurls/CheckURL.pm @@ -30,26 +30,11 @@ sub check_url($); sub check_http_url($$$$) { - use Net::HTTP; - use Net::HTTPS; + require LWP::UserAgent; my ($protocol, $host, $path, $file) = @_; - my $s; - if ($protocol eq "http") { - $s = Net::HTTP->new(Host => $host, Timeout => 120); - } - elsif ($protocol eq "https") { - $s = Net::HTTPS->new(Host => $host, Timeout => 120); - } - else { - print " Unhandled http protocol \"$protocol\""; - return 3; - } - if (! $s) { - print " " . $@; - return 3; - } + my $ua = LWP::UserAgent->new; my $getp = "/"; if ($path ne "") { $getp .= $path; @@ -62,27 +47,32 @@ sub check_http_url($$$$) $getp .= "/$file"; } } - #print " Trying to use GET => \"$getp\""; - $s->write_request(GET => $getp, 'User-Agent' => "Mozilla/6.0"); - my($code, $mess, %h) = $s->read_response_headers; - - # Try to read something my $buf; - my $n = $s->read_entity_body($buf, 1024); - if (! defined($n)) { - print " Read from \"$protocol://$host$getp\" "; + $ua->agent("Firefox/43.0"); + my $response = $ua->get("$protocol://$host$getp"); + if ($response->is_success) { + $buf = $response->decoded_content; + } + else { + print " " . $response->status_line . ": "; return 3; } - if ($buf =~ /\([^\<]*404[^\<]*)\<\/title\>/i) { + my @title = (); + my $res = 0; + while ($buf =~ s/\([^\<]*)\<\/title\>//i) { my $title = $1; - $title =~ s/\n/ /g; - print "title = \"$title\"\n"; - if ($title =~ /Error 404|404 Not Found/) { - print " Page reports 'Error 404' from \"$protocol://$host$getp\" "; - return 3; + $title =~ s/[\r\n]/ /g; + $title =~ s/ +/ /g; + $title =~ s/^ //; + $title =~ s/ $//; + push(@title, $title); + print "title = \"$title\": "; + if ($title =~ /Error 404|Not Found/) { + print " Page reports 'Not Found' from \"$protocol://$host$getp\": "; + $res = 3; } } - return 0; + return $res; } # Returns ($err, $isdir) diff --git a/development/checkurls/knownToRegisterURLS b/development/checkurls/knownToRegisterURLS new file mode 100644 index 0000000000..5a3fb8174b --- /dev/null +++ b/development/checkurls/knownToRegisterURLS @@ -0,0 +1,7 @@ +# Urls probably exist, but to check +# we need to register and login first +http://www.issn.org/en/node/344 +http://www.springer.de/author/tex/help-journals.html +http://www.wkap.nl/jrnllist.htm/JRNLHOME +http://www.wkap.nl/kaphtml.htm/STYLEFILES + diff --git a/development/checkurls/search_url.pl b/development/checkurls/search_url.pl index 8b900449ab..c656dfe92e 100755 --- a/development/checkurls/search_url.pl +++ b/development/checkurls/search_url.pl @@ -59,6 +59,7 @@ my %ignoredURLS = (); my %revertedURLS = (); my %extraURLS = (); my %selectedURLS = (); +my %knownToRegisterURLS = (); my $summaryFile = undef; my $checkSelectedOnly = 0; @@ -82,11 +83,14 @@ for my $arg (@ARGV) { readUrls($val, %revertedURLS); } elsif ($type eq "extraURLS") { - readUrls($val, %extraURLS); + readUrls($val, %extraURLS); } elsif ($type eq "selectedURLS") { $checkSelectedOnly = 1; - readUrls($val, %selectedURLS); + readUrls($val, %selectedURLS); + } + elsif ($type eq "knownToRegisterURLS") { + readUrls($val, %knownToRegisterURLS); } elsif ($type eq "summaryFile") { if (open(SFO, '>', "$val")) { @@ -104,13 +108,14 @@ my $errorcount = 0; my $URLScount = 0; for my $u (@urls) { - if (defined($selectedURLS{$u})) { - ${selectedURLS}{$u}->{count} += 1; - } if (defined($ignoredURLS{$u})) { $ignoredURLS{$u}->{count} += 1; next; } + next if (defined($knownToRegisterURLS{$u})); + if (defined($selectedURLS{$u})) { + ${selectedURLS}{$u}->{count} += 1; + } next if ($checkSelectedOnly && ! defined($selectedURLS{$u})); $URLScount++; print "Checking '$u': ";