ctests for broken URLs in URL insets of LyX docs

These tests check for broken URLs in the URL insets of
the manuals, examples, and templates.

The tests are disabled by default because the Perl interpreter
is needed.

Later on they can be activated with a flag, as follows:

  cmake ... -DLYX_ENABLE_URLTESTS=ON

but for now the connection from the TOP-CMakeLists.txt is left out.

Missing part:
        1.) Declaring an setting the option
                LYX_OPTION(ENABLE_URLTESTS  "Enable for URL tests" OFF ALL)
        2.) make the connection
                if(LYX_ENABLE_URLTESTS)
                        add_subdirectory(development/checkurls "${TOP_BINARY_DIR}/checkurls")
                endif()
This commit is contained in:
Kornel Benko 2013-08-28 12:17:40 +02:00
parent ab891a653c
commit 8286411c81
5 changed files with 505 additions and 0 deletions

View File

@ -0,0 +1,49 @@
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.
#
# Copyright (c) 2013 Kornel Benko <kornel@lyx.org>
# (c) 2013 Scott Kostyshak <skotysh@lyx.org>
#
# Needed, because of perl scripts here
find_package(Perl REQUIRED)
# create file the lyx-files-list
set(TOP_SEARCH_PATH "${TOP_SRC_DIR}")
set(LYXFILES_FILE "${CMAKE_CURRENT_BINARY_DIR}/filesToScan")
file(WRITE "${LYXFILES_FILE}")
file(GLOB_RECURSE lyx_files RELATIVE "${TOP_SEARCH_PATH}" "${TOP_SEARCH_PATH}/*.lyx")
foreach(_f ${lyx_files})
file(APPEND "${LYXFILES_FILE}" "${_f}\n")
endforeach()
# Define the perl-script running the actual test
set(SEARCH_URL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/search_url.pl")
# Test all but inaccessible
add_test(NAME "check_accessible_urls"
WORKING_DIRECTORY "${TOP_SEARCH_PATH}"
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
"filesToScan=${LYXFILES_FILE}"
"ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS")
# Test inaccessible, but revert the error marker (failed <=> passed)
# if this fails, then some url is accessible and does not belong
# to file inaccessibleURLS
add_test(NAME "check_inaccessible_urls"
WORKING_DIRECTORY "${TOP_SEARCH_PATH}"
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
"filesToScan=${LYXFILES_FILE}"
"selectedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS")
#
# Test our own bad URLs
# if this test fails, then our testtool contains errors
add_test(NAME "check_invalid_urls"
WORKING_DIRECTORY "${TOP_SEARCH_PATH}"
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
"extraURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS")

238
development/checkurls/CheckURL.pm Executable file
View File

@ -0,0 +1,238 @@
# -*- mode: perl; -*-
package CheckURL;
# file CheckURL.pm
#
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.
#
# authors: Kornel Benko <kornel@lyx.org>
# Scott Kostyshak <skotysh@lyx.org>
#
# Check if given URL exists and is accessible
#
use strict;
our(@EXPORT, @ISA);
BEGIN {
use Exporter ();
@ISA = qw(Exporter);
@EXPORT = qw(check_url);
}
sub check_http_url($$$$)
{
use Net::HTTP;
use Net::HTTPS;
my ($protocol, $host, $path, $file) = @_;
my $s;
if ($protocol eq "http") {
$s = Net::HTTP->new(Host => $host, Timeout => 120);
}
elsif ($protocol eq "https") {
$s = Net::HTTPS->new(Host => $host, Timeout => 120);
}
else {
print " Unhandled http protocol \"$protocol\"";
return 3;
}
if (! $s) {
print " " . $@;
return 3;
}
my $getp = "/";
if ($path ne "") {
$getp .= $path;
}
if (defined($file)) {
if ($getp =~ /\/$/) {
$getp .= $file;
}
else {
$getp .= "/$file";
}
}
#print " Trying to use GET => \"$getp\"";
$s->write_request(GET => $getp, 'User-Agent' => "Mozilla/5.0");
my($code, $mess, %h) = $s->read_response_headers;
# Try to read something
my $buf;
my $n = $s->read_entity_body($buf, 1024);
if (! defined($n)) {
print " Read from \"$protocol://$host$getp\" failed";
return 3;
}
}
# Returns ($err, $isdir)
# returns 0, x if file does not match entry
# 1, x everything OK
# 2, x if not accesible (permission)
sub check_ftp_dir_entry($$)
{
my ($file, $e) = @_;
my $other = '---';
my $isdir = 0;
#print "Checking '$file' against '$e'\n";
$file =~ s/^\///;
$isdir = 1 if ($e =~ /^d/);
return(0,$isdir) if ($e !~ /\s$file$/);
if ($e =~ /^.[-rwx]{6}([-rwx]{3})\s/) {
$other = $1;
}
else {
#print "Invalid entry\n";
# Invalid entry
return(0,$isdir);
}
return(2,$isdir) if ($other !~ /^r/); # not readable
if ($isdir) {
#return(2,$isdir) if ($other !~ /x$/); # directory, but not executable
}
return(1,$isdir);
}
sub check_ftp_url($$$$)
{
use Net::FTP;
my ($protocol, $host, $path, $file) = @_;
my $res = 0;
my $message = "";
my $ftp = Net::FTP->new($host, Debug => 0, Timeout => 120);
if(!$ftp) {
return(3,"Cannot connect to $host");
}
if (! $ftp->login("anonymous",'-anonymous@')) {
$message = $ftp->message;
$res = 3;
}
else {
my $rEntries;
if ($path ne "") {
#print "Path = $path\n";
#if (!$ftp->cwd($path)) {
# $message = $ftp->message;
# $res = 3;
#}
$rEntries = $ftp->dir($path);
}
else {
$rEntries = $ftp->dir();
}
if (! $rEntries) {
$res = 3;
$message = "Could not read directory \"$path\"";
}
elsif (defined($file)) {
my $found = 0;
my $found2 = 0;
for my $f ( @{$rEntries}) {
#print "Entry: $path $f\n";
my ($res1,$isdir) = &check_ftp_dir_entry($file,$f);
if ($res1 == 1) {
$found = 1;
last;
}
elsif ($res1 == 2) {
# found, but not accessible
$found2 = 1;
$message = "Permission denied for '$file'";
}
}
if (! $found) {
$res = 4;
if (! $found2) {
$message = "File or directory '$file' not found";
}
}
}
}
$ftp->quit;
#print "returning ($res,$message)\n";
return($res, $message);
}
sub check_unknown_url($$$$)
{
use LWP::Simple;
my ($protocol, $host, $path, $file) = @_;
my $res = 1;
my $url = "$protocol://$host";
if ($path ne "") {
if ($path =~ /^\//) {
$url .= $path;
}
else {
$url .= "/$path";
}
}
if(defined($file)) {
#print "Trying $url$file\n";
$res = head("$url/$file");
if(! $res) {
# try to check for directory '/';
#print "Trying $url$file/\n";
$res = head("$url/$file/");
}
}
else {
#print "Trying $url\n";
$res = head($url);
}
return(! $res);
}
#
# Main entry
sub check_url($)
{
my($url) = @_;
my $file = undef;
my ($protocol,$host,$path);
my $res = 0;
# Split the url to protocol,host,path
if ($url =~ /^([a-z]+):\/\/([^\/]+)(.*)$/) {
$protocol = $1;
$host = $2;
$path = $3;
$path =~ s/^\///;
if($path =~ s/\/([^\/]+)$//) {
$file = $1;
if($file =~ / /) {
# Filename contains ' ', maybe invalid. Don't check
$file = undef;
}
$path .= "/";
}
}
else {
print " Invalid url '$url'";
return 2;
}
if ($protocol =~ /^https?$/) {
return &check_http_url($protocol, $host, $path, $file);
}
elsif ($protocol eq "ftp") {
my $message;
($res, $message) = &check_ftp_url($protocol, $host, $path, $file);
return $res;
}
else {
# it never should reach this point
print " What protocol is '$protocol'?";
$res = &check_unknown_url($protocol, $host, $path, $file);
return $res;
}
}
1;

View File

@ -0,0 +1,32 @@
# "Login incorrect."
# permission denied
ftp://ftp.aas.org/pubs
ftp://ftp.springer.de/pub/tex/latex/aa
# Cannot connect to trick.ntp.springer.de
ftp://trick.ntp.springer.de/pub/tex/latex/llncs/latex2e
# file/directory not found
ftp://www.ctan.org/pub/tex-archive/macros/latex2e/contrib/fancyhdr/fancyhdr.pdf
ftp://tug.ctan.org/pub/tex-archive/macros/latex/contrib/caption/caption.pdf
# What protocol is 'htp'
htp://www.lyx.org
# Net::HTTP: connect: timeout
http://www.educat.hu-berlin.de/~voss/lyx
#Net::HTTP: Bad hostname 'math.tulane.edu'
http://math.tulane.edu/~entcs/
# Net::HTTP: Bad hostname 'media.texample.net'
http://media.texample.net/tikz/examples/TEX/free-body-diagrams.tex
# Net::HTTP: Bad hostname 'shelob.ce.ttu.edu'
http://shelob.ce.ttu.edu/daves/faq.html
http://shelob.ce.ttu.edu/daves/lpfaq/faq.html
# Net::HTTP: Bad hostname 'www.devel.lyx.org'
http://www.devel.lyx.org
http://www.devel.lyx.org/
# Net::HTTP: Bad hostname 'www.euskalgnu.org'
http://www.euskalgnu.org
http://www.uon.edu/doe
# Net::HTTP: Bad hostname 'www.it.lyx.org'
http://www.it.lyx.org
# Net::HTTP: Bad hostname 'www.mx.lyx.org'
http://www.mx.lyx.org
# Net::HTTP: Bad hostname 'www.no.lyx.org'
http://www.no.lyx.org

View File

@ -0,0 +1 @@
proto://host.xx.ab/abcd

View File

@ -0,0 +1,185 @@
#! /usr/bin/env perl
# -*- mode: perl; -*-
#
# file search_url.pl
# script to search for url's in lyxfiles
# and testing their validity.
#
# Syntax: search_url.pl [(filesToScan|(ignored|reverted|extra|selected)URLS)={path_to_control]*
# Param value is a path to a file containing list of xxx:
# filesToScan={xxx = lyx-file-names to be scanned for}
# ignoredURLS={xxx = urls that are discarded from test}
# revertedURLS={xxx = urls that should fail, to test the test with invalid urls}
# extraURLS={xxx = urls which should be also checked}
#
# This file is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this software; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Copyright (c) 2013 Kornel Benko <kornel@lyx.org>
# (c) 2013 Scott Kostyshak <skotysh@lyx.org>
use strict;
BEGIN {
use File::Spec;
my $p = File::Spec->rel2abs(__FILE__);
$p =~ s/[\/\\]?[^\/\\]+$//;
unshift(@INC, "$p");
}
use CheckURL;
$ENV{LANG} = "en";
$ENV{LANGUAGE} = "en";
my %URLS = ();
my %ignoredURLS = ();
my %revertedURLS = ();
my %extraURLS = ();
my %selectedURLS = ();
my $checkSelectedOnly = 0;
for my $arg (@ARGV) {
die("Bad argument \"$arg\"") if ($arg !~ /=/);
my ($type,$val) = split("=", $arg);
if ($type eq "filesToScan") {
#The file should be a list of files to search in
if (open(FLIST, $val)) {
while (my $l = <FLIST>) {
chomp($l);
&parse_file($l);
}
close(FLIST);
}
}
elsif ($type eq "ignoredURLS") {
&readUrls($val, \%ignoredURLS);
}
elsif ($type eq "revertedURLS") {
&readUrls($val, \%revertedURLS);
}
elsif ($type eq "extraURLS") {
&readUrls($val, \%extraURLS);
}
elsif ($type eq "selectedURLS") {
$checkSelectedOnly = 1;
&readUrls($val, \%selectedURLS);
}
else {
die("Invalid argument \"$arg\"");
}
}
my @urls = sort keys %URLS, keys %extraURLS;
my $errorcount = 0;
my $URLScount = 0;
for my $u (@urls) {
next if (defined($ignoredURLS{$u}));
next if ($checkSelectedOnly && ! defined(${selectedURLS}{$u}));
$URLScount++;
print "Checking '$u'";
my $res = &check_url($u);
if ($res) {
print ": Failed\n";
}
else {
print ": OK\n";
}
my $printSourceFiles = 0;
my $err_txt = "Error url:";
if ($res || $checkSelectedOnly) {
$printSourceFiles = 1;
}
if ($res && defined($revertedURLS{$u})) {
$err_txt = "Failed url:";
}
$res = ! $res if (defined($revertedURLS{$u}));
if ($res || $checkSelectedOnly) {
print "$err_txt \"$u\"\n";
}
if ($printSourceFiles) {
if (defined($URLS{$u})) {
for my $f(sort keys %{$URLS{$u}}) {
print " $f\n";
}
}
if ($res ) {
$errorcount++;
}
}
}
print "\n$errorcount URL-tests failed out of $URLScount\n\n";
exit($errorcount);
###############################################################################
sub readUrls($$)
{
my ($file, $rUrls) = @_;
die("Could not read file $file") if (! open(ULIST, $file));
while (my $l = <ULIST>) {
$l =~ s/[\r\n]+$//; # remove eol
$l =~ s/\s*\#.*$//; # remove comment
next if ($l eq "");
$rUrls->{$l} = 1;
}
close(ULIST);
}
sub parse_file($)
{
my($f) = @_;
my $status = "out"; # outside of URL
return if ($f =~ /\/attic\//);
if(open(FI, $f)) {
while(my $l = <FI>) {
$l =~ s/[\r\n]+$//; # Simulate chomp
if($status eq "out") {
# searching for "\begin_inset Flex URL"
if($l =~ /^\s*\\begin_inset\s+Flex\s+URL\s*$/) {
$status = "ininset";
}
}
else {
if($l =~ /^\s*\\end_(layout|inset)\s*$/) {
$status = "out";
}
else {
if($l =~ /\s*([a-z]+:\/\/.+)\s*$/) {
my $url = $1;
$status = "out";
&handle_url($url, $f);
}
}
}
}
close(FI);
}
}
sub handle_url($$)
{
my($url, $f) = @_;
if(!defined($URLS{$url})) {
$URLS{$url} = {};
}
$URLS{$url}->{$f} = 1;
}