Cmake url tests: Use more sophiticated check for urls.

This commit is contained in:
Kornel Benko 2016-01-03 15:25:09 +01:00
parent d2ec79beac
commit 689a144824
4 changed files with 42 additions and 37 deletions

View File

@ -28,6 +28,7 @@ add_test(NAME "check_accessible_urls"
"filesToScan=${LYXFILES_FILE}"
"ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
"knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedAccessibleURLS.log")
# Test inaccessible, but revert the error marker (failed <=> passed)
@ -39,6 +40,7 @@ add_test(NAME "check_inaccessible_urls"
"filesToScan=${LYXFILES_FILE}"
"selectedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedInaccessibleURLS.log")
#
@ -50,6 +52,7 @@ add_test(NAME "check_invalid_urls"
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
"extraURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
"knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedKnownInvalidURLS.log")
set(URL_TEST_NAMES "check_accessible_urls" "check_inaccessible_urls" "check_invalid_urls")

View File

@ -30,26 +30,11 @@ sub check_url($);
sub check_http_url($$$$)
{
use Net::HTTP;
use Net::HTTPS;
require LWP::UserAgent;
my ($protocol, $host, $path, $file) = @_;
my $s;
if ($protocol eq "http") {
$s = Net::HTTP->new(Host => $host, Timeout => 120);
}
elsif ($protocol eq "https") {
$s = Net::HTTPS->new(Host => $host, Timeout => 120);
}
else {
print " Unhandled http protocol \"$protocol\"";
return 3;
}
if (! $s) {
print " " . $@;
return 3;
}
my $ua = LWP::UserAgent->new;
my $getp = "/";
if ($path ne "") {
$getp .= $path;
@ -62,27 +47,32 @@ sub check_http_url($$$$)
$getp .= "/$file";
}
}
#print " Trying to use GET => \"$getp\"";
$s->write_request(GET => $getp, 'User-Agent' => "Mozilla/6.0");
my($code, $mess, %h) = $s->read_response_headers;
# Try to read something
my $buf;
my $n = $s->read_entity_body($buf, 1024);
if (! defined($n)) {
print " Read from \"$protocol://$host$getp\" ";
$ua->agent("Firefox/43.0");
my $response = $ua->get("$protocol://$host$getp");
if ($response->is_success) {
$buf = $response->decoded_content;
}
else {
print " " . $response->status_line . ": ";
return 3;
}
if ($buf =~ /\<title\>([^\<]*404[^\<]*)\<\/title\>/i) {
my @title = ();
my $res = 0;
while ($buf =~ s/\<title\>([^\<]*)\<\/title\>//i) {
my $title = $1;
$title =~ s/\n/ /g;
print "title = \"$title\"\n";
if ($title =~ /Error 404|404 Not Found/) {
print " Page reports 'Error 404' from \"$protocol://$host$getp\" ";
return 3;
$title =~ s/[\r\n]/ /g;
$title =~ s/ +/ /g;
$title =~ s/^ //;
$title =~ s/ $//;
push(@title, $title);
print "title = \"$title\": ";
if ($title =~ /Error 404|Not Found/) {
print " Page reports 'Not Found' from \"$protocol://$host$getp\": ";
$res = 3;
}
}
return 0;
return $res;
}
# Returns ($err, $isdir)

View File

@ -0,0 +1,7 @@
# Urls probably exist, but to check
# we need to register and login first
http://www.issn.org/en/node/344
http://www.springer.de/author/tex/help-journals.html
http://www.wkap.nl/jrnllist.htm/JRNLHOME
http://www.wkap.nl/kaphtml.htm/STYLEFILES

View File

@ -59,6 +59,7 @@ my %ignoredURLS = ();
my %revertedURLS = ();
my %extraURLS = ();
my %selectedURLS = ();
my %knownToRegisterURLS = ();
my $summaryFile = undef;
my $checkSelectedOnly = 0;
@ -82,11 +83,14 @@ for my $arg (@ARGV) {
readUrls($val, %revertedURLS);
}
elsif ($type eq "extraURLS") {
readUrls($val, %extraURLS);
readUrls($val, %extraURLS);
}
elsif ($type eq "selectedURLS") {
$checkSelectedOnly = 1;
readUrls($val, %selectedURLS);
readUrls($val, %selectedURLS);
}
elsif ($type eq "knownToRegisterURLS") {
readUrls($val, %knownToRegisterURLS);
}
elsif ($type eq "summaryFile") {
if (open(SFO, '>', "$val")) {
@ -104,13 +108,14 @@ my $errorcount = 0;
my $URLScount = 0;
for my $u (@urls) {
if (defined($selectedURLS{$u})) {
${selectedURLS}{$u}->{count} += 1;
}
if (defined($ignoredURLS{$u})) {
$ignoredURLS{$u}->{count} += 1;
next;
}
next if (defined($knownToRegisterURLS{$u}));
if (defined($selectedURLS{$u})) {
${selectedURLS}{$u}->{count} += 1;
}
next if ($checkSelectedOnly && ! defined($selectedURLS{$u}));
$URLScount++;
print "Checking '$u': ";