Cmake url tests: Use more sophiticated check for urls.

This commit is contained in:
Kornel Benko 2016-01-03 15:25:09 +01:00
parent d2ec79beac
commit 689a144824
4 changed files with 42 additions and 37 deletions

View File

@ -28,6 +28,7 @@ add_test(NAME "check_accessible_urls"
"filesToScan=${LYXFILES_FILE}" "filesToScan=${LYXFILES_FILE}"
"ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS" "ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS" "ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
"knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedAccessibleURLS.log") "summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedAccessibleURLS.log")
# Test inaccessible, but revert the error marker (failed <=> passed) # Test inaccessible, but revert the error marker (failed <=> passed)
@ -39,6 +40,7 @@ add_test(NAME "check_inaccessible_urls"
"filesToScan=${LYXFILES_FILE}" "filesToScan=${LYXFILES_FILE}"
"selectedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS" "selectedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS" "revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedInaccessibleURLS.log") "summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedInaccessibleURLS.log")
# #
@ -50,6 +52,7 @@ add_test(NAME "check_invalid_urls"
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}" COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
"extraURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS" "extraURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS" "revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
"knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedKnownInvalidURLS.log") "summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedKnownInvalidURLS.log")
set(URL_TEST_NAMES "check_accessible_urls" "check_inaccessible_urls" "check_invalid_urls") set(URL_TEST_NAMES "check_accessible_urls" "check_inaccessible_urls" "check_invalid_urls")

View File

@ -30,26 +30,11 @@ sub check_url($);
sub check_http_url($$$$) sub check_http_url($$$$)
{ {
use Net::HTTP; require LWP::UserAgent;
use Net::HTTPS;
my ($protocol, $host, $path, $file) = @_; my ($protocol, $host, $path, $file) = @_;
my $s; my $ua = LWP::UserAgent->new;
if ($protocol eq "http") {
$s = Net::HTTP->new(Host => $host, Timeout => 120);
}
elsif ($protocol eq "https") {
$s = Net::HTTPS->new(Host => $host, Timeout => 120);
}
else {
print " Unhandled http protocol \"$protocol\"";
return 3;
}
if (! $s) {
print " " . $@;
return 3;
}
my $getp = "/"; my $getp = "/";
if ($path ne "") { if ($path ne "") {
$getp .= $path; $getp .= $path;
@ -62,27 +47,32 @@ sub check_http_url($$$$)
$getp .= "/$file"; $getp .= "/$file";
} }
} }
#print " Trying to use GET => \"$getp\"";
$s->write_request(GET => $getp, 'User-Agent' => "Mozilla/6.0");
my($code, $mess, %h) = $s->read_response_headers;
# Try to read something
my $buf; my $buf;
my $n = $s->read_entity_body($buf, 1024); $ua->agent("Firefox/43.0");
if (! defined($n)) { my $response = $ua->get("$protocol://$host$getp");
print " Read from \"$protocol://$host$getp\" "; if ($response->is_success) {
$buf = $response->decoded_content;
}
else {
print " " . $response->status_line . ": ";
return 3; return 3;
} }
if ($buf =~ /\<title\>([^\<]*404[^\<]*)\<\/title\>/i) { my @title = ();
my $res = 0;
while ($buf =~ s/\<title\>([^\<]*)\<\/title\>//i) {
my $title = $1; my $title = $1;
$title =~ s/\n/ /g; $title =~ s/[\r\n]/ /g;
print "title = \"$title\"\n"; $title =~ s/ +/ /g;
if ($title =~ /Error 404|404 Not Found/) { $title =~ s/^ //;
print " Page reports 'Error 404' from \"$protocol://$host$getp\" "; $title =~ s/ $//;
return 3; push(@title, $title);
print "title = \"$title\": ";
if ($title =~ /Error 404|Not Found/) {
print " Page reports 'Not Found' from \"$protocol://$host$getp\": ";
$res = 3;
} }
} }
return 0; return $res;
} }
# Returns ($err, $isdir) # Returns ($err, $isdir)

View File

@ -0,0 +1,7 @@
# Urls probably exist, but to check
# we need to register and login first
http://www.issn.org/en/node/344
http://www.springer.de/author/tex/help-journals.html
http://www.wkap.nl/jrnllist.htm/JRNLHOME
http://www.wkap.nl/kaphtml.htm/STYLEFILES

View File

@ -59,6 +59,7 @@ my %ignoredURLS = ();
my %revertedURLS = (); my %revertedURLS = ();
my %extraURLS = (); my %extraURLS = ();
my %selectedURLS = (); my %selectedURLS = ();
my %knownToRegisterURLS = ();
my $summaryFile = undef; my $summaryFile = undef;
my $checkSelectedOnly = 0; my $checkSelectedOnly = 0;
@ -82,11 +83,14 @@ for my $arg (@ARGV) {
readUrls($val, %revertedURLS); readUrls($val, %revertedURLS);
} }
elsif ($type eq "extraURLS") { elsif ($type eq "extraURLS") {
readUrls($val, %extraURLS); readUrls($val, %extraURLS);
} }
elsif ($type eq "selectedURLS") { elsif ($type eq "selectedURLS") {
$checkSelectedOnly = 1; $checkSelectedOnly = 1;
readUrls($val, %selectedURLS); readUrls($val, %selectedURLS);
}
elsif ($type eq "knownToRegisterURLS") {
readUrls($val, %knownToRegisterURLS);
} }
elsif ($type eq "summaryFile") { elsif ($type eq "summaryFile") {
if (open(SFO, '>', "$val")) { if (open(SFO, '>', "$val")) {
@ -104,13 +108,14 @@ my $errorcount = 0;
my $URLScount = 0; my $URLScount = 0;
for my $u (@urls) { for my $u (@urls) {
if (defined($selectedURLS{$u})) {
${selectedURLS}{$u}->{count} += 1;
}
if (defined($ignoredURLS{$u})) { if (defined($ignoredURLS{$u})) {
$ignoredURLS{$u}->{count} += 1; $ignoredURLS{$u}->{count} += 1;
next; next;
} }
next if (defined($knownToRegisterURLS{$u}));
if (defined($selectedURLS{$u})) {
${selectedURLS}{$u}->{count} += 1;
}
next if ($checkSelectedOnly && ! defined($selectedURLS{$u})); next if ($checkSelectedOnly && ! defined($selectedURLS{$u}));
$URLScount++; $URLScount++;
print "Checking '$u': "; print "Checking '$u': ";