# -*- mode: perl; -*- package CheckURL; # file CheckURL.pm # # This file is part of LyX, the document processor. # Licence details can be found in the file COPYING. # # authors: Kornel Benko # Scott Kostyshak # # Check if given URL exists and is accessible # use strict; our (@EXPORT, @ISA); BEGIN { use Exporter (); @ISA = qw(Exporter); @EXPORT = qw(check_url constructExtraTestUrl); } # Prototypes sub check_http_url($$$$); sub check_ftp_dir_entry($$); sub check_ftp_url($$$$); sub check_unknown_url($$$$); sub check_url($$$$); ################ my $fe; my $fs; sub check_http_url($$$$) { require LWP::UserAgent; my ($protocol, $host, $path, $file) = @_; my $ua = LWP::UserAgent->new(timeout => 20); my $getp = "/"; if ($path ne "") { $getp .= $path; } if (defined($file)) { if ($getp =~ /\/$/) { $getp .= $file; } else { $getp .= "/$file"; } } my $buf; $ua->agent("Firefox/43.0"); my $response = $ua->get("$protocol://$host$getp"); if ($response->is_success) { $buf = $response->decoded_content; } else { print $fe " " . $response->status_line . ": "; return 3; } my @atitle = (); my %htitle = (); my $res = 0; while ($buf =~ s/\([^\<]*)\<\/title\>//i) { my $title = $1; $title =~ s/[\r\n]/ /g; $title =~ s/ +/ /g; $title =~ s/^ //; $title =~ s/ $//; if (! defined($htitle{$title})) { push(@atitle, $title); $htitle{$title} = 1; } if ($title =~ /Error 404|Not Found/) { print $fe " Page reports 'Not Found' from \"$protocol://$host$getp\": "; $res = 3; } } print $fe "title = \"" . join(': ', @atitle) . "\": "; return $res; } # Returns ($err, $isdir) # returns 0, x if file does not match entry # 1, x everything OK # 2, x if not accesible (permission) sub check_ftp_dir_entry($$) { my ($file, $e) = @_; my $other = '---'; my $isdir = 0; #print $fe "Checking '$file' against '$e'\n"; $file =~ s/^\///; $isdir = 1 if ($e =~ /^d/); return (0, $isdir) if ($e !~ /\s$file$/); if ($e =~ /^.[-rwx]{6}([-rwx]{3})\s/) { $other = $1; } else { #print $fe "Invalid entry\n"; # Invalid entry return (0, $isdir); } return (2, $isdir) if ($other !~ /^r/); # not readable if ($isdir) { #return(2,$isdir) if ($other !~ /x$/); # directory, but not executable } return (1, $isdir); } sub check_ftp2_url($$$$) { my ($protocol, $host, $path, $file) = @_; my $checkentry = 1; #print $fe "\nhost $host\n"; #print $fe "path $path\n"; #print $fe "file $file\n"; my $url = "$protocol://$host"; $path =~ s/\/$//; if (defined($file)) { $url = "$url/$path/$file"; } else { $url = "$url/$path/."; } #print $fe "curl $url, file = $file\n"; my %listfiles = (); if (open(FFTP, "curl --anyauth -l $url|")) { while (my $l = ) { chomp($l); $listfiles{$l} = 1; } close(FFTP); } if (%listfiles) { if (!defined($file)) { return (0, "OK"); } elsif (defined($listfiles{$file})) { return (0, "OK"); } elsif (defined($listfiles{"ftpinfo.txt"})) { return (0, "Probably a directory"); } else { return (1, "Not found"); } } else { return (1, "Error"); } } sub check_ftp_url($$$$) { use Net::FTP; my ($protocol, $host, $path, $file) = @_; my $res = 0; my $message = ""; my $ftp = Net::FTP->new($host, Debug => 0, Timeout => 20); if (!$ftp) { return (3, "Cannot connect to $host"); } if (!$ftp->login("anonymous", '-anonymous@')) { $message = $ftp->message; $res = 3; } else { my $rEntries; if ($path ne "") { #print $fe "Path = $path\n"; #if (!$ftp->cwd($path)) { # $message = $ftp->message; # $res = 3; #} $rEntries = $ftp->dir($path); } else { $rEntries = $ftp->dir(); } if (!$rEntries) { $res = 3; $message = "Could not read directory \"$path\""; } elsif (defined($file)) { my $found = 0; my $found2 = 0; for my $f (@{$rEntries}) { #print $fe "Entry: $path $f\n"; my ($res1, $isdir) = check_ftp_dir_entry($file, $f); if ($res1 == 1) { $found = 1; last; } elsif ($res1 == 2) { # found, but not accessible $found2 = 1; $message = "Permission denied for '$file'"; } } if (!$found) { $res = 4; if (!$found2) { $message = "File or directory '$file' not found"; } } } } $ftp->quit; #print $fe "returning ($res,$message)\n"; return ($res, $message); } sub check_unknown_url($$$$) { use LWP::Simple; my ($protocol, $host, $path, $file) = @_; my $res = 1; my $url = "$protocol://$host"; if ($path ne "") { if ($path =~ /^\//) { $url .= $path; } else { $url .= "/$path"; } } if (defined($file)) { #print $fe "Trying $url$file\n"; $res = head("$url/$file"); if (!$res) { # try to check for directory '/'; #print $fe "Trying $url$file/\n"; $res = head("$url/$file/"); } } else { #print $fe "Trying $url\n"; $res = head($url); } return (!$res); } # # Main entry sub check_url($$$$) { my ($url, $use_curl, $fex, $fsx) = @_; $url =~ s/%20/ /g; $fe = $fex; $fs = $fsx; my $file = undef; my ($protocol, $host, $path); my $res = 0; # Split the url to protocol,host,path if ($url =~ /^([a-z]+):\/\/([^\/]+)(.*)$/) { $protocol = $1; $host = $2; $path = $3; $path =~ s/^\///; if ($path =~ s/\/([^\/]+)$//) { $file = $1; if ($file =~ / /) { # Filename contains ' ', maybe invalid. Don't check $file = undef; } $path .= "/"; } } else { print $fe " Invalid url '$url'"; return 2; } if ($protocol =~ /^https?$/) { return check_http_url($protocol, $host, $path, $file); } elsif ($protocol eq "ftp") { my $message; if ($use_curl) { ($res, $message) = check_ftp2_url($protocol, $host, $path, $file); } else { ($res, $message) = check_ftp_url($protocol, $host, $path, $file); } return $res; } else { # it never should reach this point print $fe " What protocol is '$protocol'?"; $res = check_unknown_url($protocol, $host, $path, $file); return $res; } } sub constructExtraTestUrl($) { my ($url) = @_; my $urlok = $url; my $protokol; if ($urlok =~ s/^(ftp|https?):\/\///) { $protokol = $1; if ($protokol eq 'http') { $protokol = 'https'; } if (($protokol eq 'ftp') && ($urlok =~ /ctan/)) { $protokol = 'https'; } } $urlok =~ s/^([^\/]+)//; my $server = $1; $urlok =~ s/^\///; if ($server =~ /ctan/) { $urlok =~ s/\/\/+/\//g; $urlok =~ s/^ctan\///; if ($urlok =~ /[\w][.](pdf|html|dvi)$/) { if ($urlok =~ s/^(tex-archive|CTAN)\///) { $server = 'mirrors.ctan.org'; } elsif ($urlok =~ /(pgf)\//) { $server = 'www.ctan.org'; } } else { if ($urlok =~ s/\/$//) { $server = 'www.cpan.org'; if ($urlok ne '') { if ("$urlok/" =~ /^(biblio|bibliography|digest|documentation|dviware|fonts|graphics|help|indexing|info|install|languages?|macros|obsolete|support|systems|tds|usergrps|web)\// ) { $urlok = 'tex-archive/' . $urlok; } if ("$urlok/" !~ /^(pkg|topic|tex-archive|author)\//) { die(""); } } } } } my $url2; if ($urlok eq '') { $url2 = "$protokol://$server"; } else { $url2 = "$protokol://$server/$urlok"; } return($url2); } 1;