mirror of
https://git.lyx.org/repos/lyx.git
synced 2024-12-22 05:16:21 +00:00
ctests for broken URLs in URL insets of LyX docs
These tests check for broken URLs in the URL insets of the manuals, examples, and templates. The tests are disabled by default because the Perl interpreter is needed. Later on they can be activated with a flag, as follows: cmake ... -DLYX_ENABLE_URLTESTS=ON but for now the connection from the TOP-CMakeLists.txt is left out. Missing part: 1.) Declaring an setting the option LYX_OPTION(ENABLE_URLTESTS "Enable for URL tests" OFF ALL) 2.) make the connection if(LYX_ENABLE_URLTESTS) add_subdirectory(development/checkurls "${TOP_BINARY_DIR}/checkurls") endif()
This commit is contained in:
parent
ab891a653c
commit
8286411c81
49
development/checkurls/CMakeLists.txt
Normal file
49
development/checkurls/CMakeLists.txt
Normal file
@ -0,0 +1,49 @@
|
||||
# This file is part of LyX, the document processor.
|
||||
# Licence details can be found in the file COPYING.
|
||||
#
|
||||
# Copyright (c) 2013 Kornel Benko <kornel@lyx.org>
|
||||
# (c) 2013 Scott Kostyshak <skotysh@lyx.org>
|
||||
#
|
||||
# Needed, because of perl scripts here
|
||||
find_package(Perl REQUIRED)
|
||||
|
||||
# create file the lyx-files-list
|
||||
set(TOP_SEARCH_PATH "${TOP_SRC_DIR}")
|
||||
set(LYXFILES_FILE "${CMAKE_CURRENT_BINARY_DIR}/filesToScan")
|
||||
file(WRITE "${LYXFILES_FILE}")
|
||||
file(GLOB_RECURSE lyx_files RELATIVE "${TOP_SEARCH_PATH}" "${TOP_SEARCH_PATH}/*.lyx")
|
||||
|
||||
foreach(_f ${lyx_files})
|
||||
file(APPEND "${LYXFILES_FILE}" "${_f}\n")
|
||||
endforeach()
|
||||
|
||||
# Define the perl-script running the actual test
|
||||
set(SEARCH_URL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/search_url.pl")
|
||||
|
||||
# Test all but inaccessible
|
||||
add_test(NAME "check_accessible_urls"
|
||||
WORKING_DIRECTORY "${TOP_SEARCH_PATH}"
|
||||
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
|
||||
"filesToScan=${LYXFILES_FILE}"
|
||||
"ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS")
|
||||
|
||||
# Test inaccessible, but revert the error marker (failed <=> passed)
|
||||
# if this fails, then some url is accessible and does not belong
|
||||
# to file inaccessibleURLS
|
||||
add_test(NAME "check_inaccessible_urls"
|
||||
WORKING_DIRECTORY "${TOP_SEARCH_PATH}"
|
||||
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
|
||||
"filesToScan=${LYXFILES_FILE}"
|
||||
"selectedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
|
||||
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS")
|
||||
|
||||
#
|
||||
# Test our own bad URLs
|
||||
# if this test fails, then our testtool contains errors
|
||||
add_test(NAME "check_invalid_urls"
|
||||
WORKING_DIRECTORY "${TOP_SEARCH_PATH}"
|
||||
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
|
||||
"extraURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
|
||||
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS")
|
||||
|
||||
|
238
development/checkurls/CheckURL.pm
Executable file
238
development/checkurls/CheckURL.pm
Executable file
@ -0,0 +1,238 @@
|
||||
# -*- mode: perl; -*-
|
||||
package CheckURL;
|
||||
# file CheckURL.pm
|
||||
#
|
||||
# This file is part of LyX, the document processor.
|
||||
# Licence details can be found in the file COPYING.
|
||||
#
|
||||
# authors: Kornel Benko <kornel@lyx.org>
|
||||
# Scott Kostyshak <skotysh@lyx.org>
|
||||
#
|
||||
|
||||
# Check if given URL exists and is accessible
|
||||
#
|
||||
use strict;
|
||||
|
||||
our(@EXPORT, @ISA);
|
||||
BEGIN {
|
||||
use Exporter ();
|
||||
@ISA = qw(Exporter);
|
||||
@EXPORT = qw(check_url);
|
||||
}
|
||||
|
||||
sub check_http_url($$$$)
|
||||
{
|
||||
use Net::HTTP;
|
||||
use Net::HTTPS;
|
||||
|
||||
my ($protocol, $host, $path, $file) = @_;
|
||||
|
||||
my $s;
|
||||
if ($protocol eq "http") {
|
||||
$s = Net::HTTP->new(Host => $host, Timeout => 120);
|
||||
}
|
||||
elsif ($protocol eq "https") {
|
||||
$s = Net::HTTPS->new(Host => $host, Timeout => 120);
|
||||
}
|
||||
else {
|
||||
print " Unhandled http protocol \"$protocol\"";
|
||||
return 3;
|
||||
}
|
||||
if (! $s) {
|
||||
print " " . $@;
|
||||
return 3;
|
||||
}
|
||||
my $getp = "/";
|
||||
if ($path ne "") {
|
||||
$getp .= $path;
|
||||
}
|
||||
if (defined($file)) {
|
||||
if ($getp =~ /\/$/) {
|
||||
$getp .= $file;
|
||||
}
|
||||
else {
|
||||
$getp .= "/$file";
|
||||
}
|
||||
}
|
||||
#print " Trying to use GET => \"$getp\"";
|
||||
$s->write_request(GET => $getp, 'User-Agent' => "Mozilla/5.0");
|
||||
my($code, $mess, %h) = $s->read_response_headers;
|
||||
|
||||
# Try to read something
|
||||
my $buf;
|
||||
my $n = $s->read_entity_body($buf, 1024);
|
||||
if (! defined($n)) {
|
||||
print " Read from \"$protocol://$host$getp\" failed";
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
# Returns ($err, $isdir)
|
||||
# returns 0, x if file does not match entry
|
||||
# 1, x everything OK
|
||||
# 2, x if not accesible (permission)
|
||||
sub check_ftp_dir_entry($$)
|
||||
{
|
||||
my ($file, $e) = @_;
|
||||
my $other = '---';
|
||||
my $isdir = 0;
|
||||
|
||||
#print "Checking '$file' against '$e'\n";
|
||||
$file =~ s/^\///;
|
||||
$isdir = 1 if ($e =~ /^d/);
|
||||
return(0,$isdir) if ($e !~ /\s$file$/);
|
||||
if ($e =~ /^.[-rwx]{6}([-rwx]{3})\s/) {
|
||||
$other = $1;
|
||||
}
|
||||
else {
|
||||
#print "Invalid entry\n";
|
||||
# Invalid entry
|
||||
return(0,$isdir);
|
||||
}
|
||||
return(2,$isdir) if ($other !~ /^r/); # not readable
|
||||
if ($isdir) {
|
||||
#return(2,$isdir) if ($other !~ /x$/); # directory, but not executable
|
||||
}
|
||||
return(1,$isdir);
|
||||
}
|
||||
|
||||
sub check_ftp_url($$$$)
|
||||
{
|
||||
use Net::FTP;
|
||||
|
||||
my ($protocol, $host, $path, $file) = @_;
|
||||
my $res = 0;
|
||||
my $message = "";
|
||||
|
||||
my $ftp = Net::FTP->new($host, Debug => 0, Timeout => 120);
|
||||
if(!$ftp) {
|
||||
return(3,"Cannot connect to $host");
|
||||
}
|
||||
if (! $ftp->login("anonymous",'-anonymous@')) {
|
||||
$message = $ftp->message;
|
||||
$res = 3;
|
||||
}
|
||||
else {
|
||||
my $rEntries;
|
||||
if ($path ne "") {
|
||||
#print "Path = $path\n";
|
||||
#if (!$ftp->cwd($path)) {
|
||||
# $message = $ftp->message;
|
||||
# $res = 3;
|
||||
#}
|
||||
$rEntries = $ftp->dir($path);
|
||||
}
|
||||
else {
|
||||
$rEntries = $ftp->dir();
|
||||
}
|
||||
if (! $rEntries) {
|
||||
$res = 3;
|
||||
$message = "Could not read directory \"$path\"";
|
||||
}
|
||||
elsif (defined($file)) {
|
||||
my $found = 0;
|
||||
my $found2 = 0;
|
||||
for my $f ( @{$rEntries}) {
|
||||
#print "Entry: $path $f\n";
|
||||
my ($res1,$isdir) = &check_ftp_dir_entry($file,$f);
|
||||
if ($res1 == 1) {
|
||||
$found = 1;
|
||||
last;
|
||||
}
|
||||
elsif ($res1 == 2) {
|
||||
# found, but not accessible
|
||||
$found2 = 1;
|
||||
$message = "Permission denied for '$file'";
|
||||
}
|
||||
}
|
||||
if (! $found) {
|
||||
$res = 4;
|
||||
if (! $found2) {
|
||||
$message = "File or directory '$file' not found";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$ftp->quit;
|
||||
#print "returning ($res,$message)\n";
|
||||
return($res, $message);
|
||||
}
|
||||
|
||||
sub check_unknown_url($$$$)
|
||||
{
|
||||
use LWP::Simple;
|
||||
|
||||
my ($protocol, $host, $path, $file) = @_;
|
||||
my $res = 1;
|
||||
|
||||
my $url = "$protocol://$host";
|
||||
if ($path ne "") {
|
||||
if ($path =~ /^\//) {
|
||||
$url .= $path;
|
||||
}
|
||||
else {
|
||||
$url .= "/$path";
|
||||
}
|
||||
}
|
||||
if(defined($file)) {
|
||||
#print "Trying $url$file\n";
|
||||
$res = head("$url/$file");
|
||||
if(! $res) {
|
||||
# try to check for directory '/';
|
||||
#print "Trying $url$file/\n";
|
||||
$res = head("$url/$file/");
|
||||
}
|
||||
}
|
||||
else {
|
||||
#print "Trying $url\n";
|
||||
$res = head($url);
|
||||
}
|
||||
return(! $res);
|
||||
}
|
||||
|
||||
#
|
||||
# Main entry
|
||||
sub check_url($)
|
||||
{
|
||||
my($url) = @_;
|
||||
my $file = undef;
|
||||
my ($protocol,$host,$path);
|
||||
|
||||
my $res = 0;
|
||||
|
||||
# Split the url to protocol,host,path
|
||||
if ($url =~ /^([a-z]+):\/\/([^\/]+)(.*)$/) {
|
||||
$protocol = $1;
|
||||
$host = $2;
|
||||
$path = $3;
|
||||
$path =~ s/^\///;
|
||||
if($path =~ s/\/([^\/]+)$//) {
|
||||
$file = $1;
|
||||
if($file =~ / /) {
|
||||
# Filename contains ' ', maybe invalid. Don't check
|
||||
$file = undef;
|
||||
}
|
||||
$path .= "/";
|
||||
}
|
||||
}
|
||||
else {
|
||||
print " Invalid url '$url'";
|
||||
return 2;
|
||||
}
|
||||
if ($protocol =~ /^https?$/) {
|
||||
return &check_http_url($protocol, $host, $path, $file);
|
||||
}
|
||||
elsif ($protocol eq "ftp") {
|
||||
my $message;
|
||||
($res, $message) = &check_ftp_url($protocol, $host, $path, $file);
|
||||
return $res;
|
||||
}
|
||||
else {
|
||||
# it never should reach this point
|
||||
print " What protocol is '$protocol'?";
|
||||
$res = &check_unknown_url($protocol, $host, $path, $file);
|
||||
return $res;
|
||||
}
|
||||
}
|
||||
|
||||
1;
|
32
development/checkurls/inaccessibleURLS
Normal file
32
development/checkurls/inaccessibleURLS
Normal file
@ -0,0 +1,32 @@
|
||||
# "Login incorrect."
|
||||
# permission denied
|
||||
ftp://ftp.aas.org/pubs
|
||||
ftp://ftp.springer.de/pub/tex/latex/aa
|
||||
# Cannot connect to trick.ntp.springer.de
|
||||
ftp://trick.ntp.springer.de/pub/tex/latex/llncs/latex2e
|
||||
# file/directory not found
|
||||
ftp://www.ctan.org/pub/tex-archive/macros/latex2e/contrib/fancyhdr/fancyhdr.pdf
|
||||
ftp://tug.ctan.org/pub/tex-archive/macros/latex/contrib/caption/caption.pdf
|
||||
# What protocol is 'htp'
|
||||
htp://www.lyx.org
|
||||
# Net::HTTP: connect: timeout
|
||||
http://www.educat.hu-berlin.de/~voss/lyx
|
||||
#Net::HTTP: Bad hostname 'math.tulane.edu'
|
||||
http://math.tulane.edu/~entcs/
|
||||
# Net::HTTP: Bad hostname 'media.texample.net'
|
||||
http://media.texample.net/tikz/examples/TEX/free-body-diagrams.tex
|
||||
# Net::HTTP: Bad hostname 'shelob.ce.ttu.edu'
|
||||
http://shelob.ce.ttu.edu/daves/faq.html
|
||||
http://shelob.ce.ttu.edu/daves/lpfaq/faq.html
|
||||
# Net::HTTP: Bad hostname 'www.devel.lyx.org'
|
||||
http://www.devel.lyx.org
|
||||
http://www.devel.lyx.org/
|
||||
# Net::HTTP: Bad hostname 'www.euskalgnu.org'
|
||||
http://www.euskalgnu.org
|
||||
http://www.uon.edu/doe
|
||||
# Net::HTTP: Bad hostname 'www.it.lyx.org'
|
||||
http://www.it.lyx.org
|
||||
# Net::HTTP: Bad hostname 'www.mx.lyx.org'
|
||||
http://www.mx.lyx.org
|
||||
# Net::HTTP: Bad hostname 'www.no.lyx.org'
|
||||
http://www.no.lyx.org
|
1
development/checkurls/knownInvalidURLS
Normal file
1
development/checkurls/knownInvalidURLS
Normal file
@ -0,0 +1 @@
|
||||
proto://host.xx.ab/abcd
|
185
development/checkurls/search_url.pl
Executable file
185
development/checkurls/search_url.pl
Executable file
@ -0,0 +1,185 @@
|
||||
#! /usr/bin/env perl
|
||||
# -*- mode: perl; -*-
|
||||
#
|
||||
# file search_url.pl
|
||||
# script to search for url's in lyxfiles
|
||||
# and testing their validity.
|
||||
#
|
||||
# Syntax: search_url.pl [(filesToScan|(ignored|reverted|extra|selected)URLS)={path_to_control]*
|
||||
# Param value is a path to a file containing list of xxx:
|
||||
# filesToScan={xxx = lyx-file-names to be scanned for}
|
||||
# ignoredURLS={xxx = urls that are discarded from test}
|
||||
# revertedURLS={xxx = urls that should fail, to test the test with invalid urls}
|
||||
# extraURLS={xxx = urls which should be also checked}
|
||||
#
|
||||
# This file is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This software is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public
|
||||
# License along with this software; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
#
|
||||
# Copyright (c) 2013 Kornel Benko <kornel@lyx.org>
|
||||
# (c) 2013 Scott Kostyshak <skotysh@lyx.org>
|
||||
|
||||
use strict;
|
||||
|
||||
BEGIN {
|
||||
use File::Spec;
|
||||
my $p = File::Spec->rel2abs(__FILE__);
|
||||
$p =~ s/[\/\\]?[^\/\\]+$//;
|
||||
unshift(@INC, "$p");
|
||||
}
|
||||
|
||||
use CheckURL;
|
||||
|
||||
$ENV{LANG} = "en";
|
||||
$ENV{LANGUAGE} = "en";
|
||||
|
||||
my %URLS = ();
|
||||
my %ignoredURLS = ();
|
||||
my %revertedURLS = ();
|
||||
my %extraURLS = ();
|
||||
my %selectedURLS = ();
|
||||
|
||||
my $checkSelectedOnly = 0;
|
||||
for my $arg (@ARGV) {
|
||||
die("Bad argument \"$arg\"") if ($arg !~ /=/);
|
||||
my ($type,$val) = split("=", $arg);
|
||||
if ($type eq "filesToScan") {
|
||||
#The file should be a list of files to search in
|
||||
if (open(FLIST, $val)) {
|
||||
while (my $l = <FLIST>) {
|
||||
chomp($l);
|
||||
&parse_file($l);
|
||||
}
|
||||
close(FLIST);
|
||||
}
|
||||
}
|
||||
elsif ($type eq "ignoredURLS") {
|
||||
&readUrls($val, \%ignoredURLS);
|
||||
}
|
||||
elsif ($type eq "revertedURLS") {
|
||||
&readUrls($val, \%revertedURLS);
|
||||
}
|
||||
elsif ($type eq "extraURLS") {
|
||||
&readUrls($val, \%extraURLS);
|
||||
}
|
||||
elsif ($type eq "selectedURLS") {
|
||||
$checkSelectedOnly = 1;
|
||||
&readUrls($val, \%selectedURLS);
|
||||
}
|
||||
else {
|
||||
die("Invalid argument \"$arg\"");
|
||||
}
|
||||
}
|
||||
|
||||
my @urls = sort keys %URLS, keys %extraURLS;
|
||||
my $errorcount = 0;
|
||||
|
||||
my $URLScount = 0;
|
||||
|
||||
for my $u (@urls) {
|
||||
next if (defined($ignoredURLS{$u}));
|
||||
next if ($checkSelectedOnly && ! defined(${selectedURLS}{$u}));
|
||||
$URLScount++;
|
||||
print "Checking '$u'";
|
||||
my $res = &check_url($u);
|
||||
if ($res) {
|
||||
print ": Failed\n";
|
||||
}
|
||||
else {
|
||||
print ": OK\n";
|
||||
}
|
||||
my $printSourceFiles = 0;
|
||||
my $err_txt = "Error url:";
|
||||
|
||||
if ($res || $checkSelectedOnly) {
|
||||
$printSourceFiles = 1;
|
||||
}
|
||||
if ($res && defined($revertedURLS{$u})) {
|
||||
$err_txt = "Failed url:";
|
||||
}
|
||||
$res = ! $res if (defined($revertedURLS{$u}));
|
||||
if ($res || $checkSelectedOnly) {
|
||||
print "$err_txt \"$u\"\n";
|
||||
}
|
||||
if ($printSourceFiles) {
|
||||
if (defined($URLS{$u})) {
|
||||
for my $f(sort keys %{$URLS{$u}}) {
|
||||
print " $f\n";
|
||||
}
|
||||
}
|
||||
if ($res ) {
|
||||
$errorcount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print "\n$errorcount URL-tests failed out of $URLScount\n\n";
|
||||
exit($errorcount);
|
||||
|
||||
###############################################################################
|
||||
|
||||
sub readUrls($$)
|
||||
{
|
||||
my ($file, $rUrls) = @_;
|
||||
|
||||
die("Could not read file $file") if (! open(ULIST, $file));
|
||||
while (my $l = <ULIST>) {
|
||||
$l =~ s/[\r\n]+$//; # remove eol
|
||||
$l =~ s/\s*\#.*$//; # remove comment
|
||||
next if ($l eq "");
|
||||
$rUrls->{$l} = 1;
|
||||
}
|
||||
close(ULIST);
|
||||
}
|
||||
|
||||
sub parse_file($)
|
||||
{
|
||||
my($f) = @_;
|
||||
my $status = "out"; # outside of URL
|
||||
|
||||
return if ($f =~ /\/attic\//);
|
||||
if(open(FI, $f)) {
|
||||
while(my $l = <FI>) {
|
||||
$l =~ s/[\r\n]+$//; # Simulate chomp
|
||||
if($status eq "out") {
|
||||
# searching for "\begin_inset Flex URL"
|
||||
if($l =~ /^\s*\\begin_inset\s+Flex\s+URL\s*$/) {
|
||||
$status = "ininset";
|
||||
}
|
||||
}
|
||||
else {
|
||||
if($l =~ /^\s*\\end_(layout|inset)\s*$/) {
|
||||
$status = "out";
|
||||
}
|
||||
else {
|
||||
if($l =~ /\s*([a-z]+:\/\/.+)\s*$/) {
|
||||
my $url = $1;
|
||||
$status = "out";
|
||||
&handle_url($url, $f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FI);
|
||||
}
|
||||
}
|
||||
|
||||
sub handle_url($$)
|
||||
{
|
||||
my($url, $f) = @_;
|
||||
|
||||
if(!defined($URLS{$url})) {
|
||||
$URLS{$url} = {};
|
||||
}
|
||||
$URLS{$url}->{$f} = 1;
|
||||
}
|
Loading…
Reference in New Issue
Block a user