#! /usr/bin/env perl # -*- mode: perl; -*- # Examine output of 'xmllint --sax --html' my $file = $ARGV[0]; # file to examine my @tag = (); # tag entries = {"name" => strng, "count" => sequence number} my $tagindex = 0; # next tag to be written $tag[$tagindex] = {}; my %errors = (); if (open(FI, $file)) { my $line = 0; my %entities = (); my $saxchartoprint = 0; while(my $l = ) { $line++; chomp($l); if ($l =~ /^SAX.startElementNs\(([^, \)]+)/) { # new tag my $tag = $1; $saxchartoprint = 0; #print "Start tag $tag at line $line, tagindex = $tagindex\n"; my $newentry = 1; if (defined($tag[$tagindex]->{"name"})) { if ($tag[$tagindex]->{"name"} eq $tag) { $tag[$tagindex]->{"count"} = $tag[$tagindex]->{"count"}+1; $newentry = 0; } } if ($newentry) { $tag[$tagindex] = {}; $tag[$tagindex]->{"name"} = $tag; $tag[$tagindex]->{"count"} = 1; } $tagindex++; $tag[$tagindex] = {}; } elsif ($l =~ /^SAX.endElementNs\(([^, \)]+)/) { $saxchartoprint = 0; my $tag = $1; $tagindex--; #print "End tag $tag at line $line, tagindex = $tagindex\n"; if ($tagindex < 0) { die("tagindex < 0"); } if ($tag[$tagindex]->{"name"} ne $tag) { die("End tag $tag does not match " . $tag[$tagindex]->{"name"} . " at line $line"); } } elsif ($l =~ /^SAX.error: (.*)$/) { my $err = $1; if ($err =~ /Entity\s+'([a-zA-Z0-9]+)'\s+not defined$/) { # Ignore Entity 'xxxx' not defined } elsif (! defined($errors{$err})) { my $errmsg = ""; my $trenner = ""; for (my $i = 0; $i < $tagindex; $i++) { $errmsg .= $trenner . $tag[$i]->{"name"} . "(" . $tag[$i]->{"count"} . ")"; $trenner = ", "; } $errors{$err} = $errmsg; print "$err -> $errmsg\n"; # Print the next 3 lines starting with 'SAX.characters(' $saxchartoprint = 3; } } elsif ($l =~ /: parser error :\s+(.*)$/) { my $err = $1; $errors{$err} = $errmsg; if ($errmsg =~ /Entity\s+'([a-zA-Z0-9]+)'\s+not defined/) { my $entity = $1; if (! defined($entities{$entity})) { $entities{$entity} = 1; print "$errmsg\n"; } } else { print "$l\n"; die("Unknown error $l"); } } elsif ($saxchartoprint > 0) { $saxchartoprint--; if ($l =~ /^SAX.characters\(([^\)]+)\)/) { print "\t$1\n"; } } } } if (keys %errors) { exit(1); } exit(0);