From: Tara L Andrews Date: Thu, 30 Aug 2012 21:38:06 +0000 (+0200) Subject: add exception handling X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=003113289b66dfdde424a3f5f9127c79fa9f5dda;p=scpubgit%2Fstemmatology.git add exception handling --- diff --git a/lib/Text/Tradition/Parser/CTE.pm b/lib/Text/Tradition/Parser/CTE.pm index be6adfc..69acdb5 100644 --- a/lib/Text/Tradition/Parser/CTE.pm +++ b/lib/Text/Tradition/Parser/CTE.pm @@ -3,6 +3,7 @@ package Text::Tradition::Parser::CTE; use strict; use warnings; use Encode qw/ decode /; +use Text::Tradition::Error; use Text::Tradition::Parser::Util qw/ collate_variants /; use XML::LibXML; use XML::LibXML::XPathContext; @@ -131,7 +132,7 @@ sub _remove_formatting { warn "Could not find string or file option to parse"; return; } - + # Second, remove the formatting my $xpc = XML::LibXML::XPathContext->new( $doc->documentElement ); my @useless = $xpc->findnodes( '//hi' ); @@ -147,8 +148,12 @@ sub _remove_formatting { } # Third, write out and reparse to merge the text nodes. - my $result = decode( $doc->encoding, $doc->toString() ); + my $enc = $doc->encoding || 'UTF-8'; + my $result = decode( $enc, $doc->toString() ); my $tei = $parser->parse_string( $result )->documentElement; + unless( $tei->nodeName =~ /^tei(corpus)?$/i ) { + throw( "Parsed document has non-TEI root element " . $tei->nodeName ); + } $xpc = XML::LibXML::XPathContext->new( $tei ); return( $tei, $xpc ); } @@ -416,6 +421,13 @@ sub _add_wit_path { } } +sub throw { + Text::Tradition::Error->throw( + 'ident' => 'Parser::CTE error', + 'message' => $_[0], + ); +} + =back =head1 LICENSE diff --git a/lib/Text/Tradition/Parser/GraphML.pm b/lib/Text/Tradition/Parser/GraphML.pm index a4d5717..63305ac 100644 --- a/lib/Text/Tradition/Parser/GraphML.pm +++ b/lib/Text/Tradition/Parser/GraphML.pm @@ -4,7 +4,7 @@ use strict; use warnings; use Exporter 'import'; use vars qw/ @EXPORT_OK $xpc /; - +use Text::Tradition::Error; use XML::LibXML; use XML::LibXML::XPathContext; @@ -74,9 +74,14 @@ sub graphml_parse { $edgedata->{$keyid} = $keyname; } } + + my @graph_elements = $xpc->findnodes( '/g:graphml/g:graph' ); + unless( @graph_elements ) { + throw( "No graph elements found in graph XML - is this really GraphML?" ); + } my @returned_graphs; - foreach my $graph_el ( $xpc->findnodes( '/g:graphml/g:graph' ) ) { + foreach my $graph_el ( @graph_elements ) { my $graph_hash = { 'nodes' => [], 'edges' => [], 'name' => $graph_el->getAttribute( 'id' ) }; @@ -146,6 +151,13 @@ sub _lookup_node_data { return $data; } +sub throw { + Text::Tradition::Error->throw( + 'ident' => 'Parser::GraphML error', + 'message' => $_[0], + ); +} + =head1 LICENSE This package is free software and is provided "as is" without express diff --git a/lib/Text/Tradition/Parser/TEI.pm b/lib/Text/Tradition/Parser/TEI.pm index 78ae542..8c2c3f0 100644 --- a/lib/Text/Tradition/Parser/TEI.pm +++ b/lib/Text/Tradition/Parser/TEI.pm @@ -2,6 +2,7 @@ package Text::Tradition::Parser::TEI; use strict; use warnings; +use Text::Tradition::Error; use Text::Tradition::Parser::Util qw( collate_variants ); use XML::LibXML; use XML::LibXML::XPathContext; @@ -112,6 +113,9 @@ sub parse { return; } my $tei = $doc->documentElement(); + unless( $tei->nodeName =~ /^tei(corpus)?$/i ) { + throw( "Parsed document has non-TEI root element " . $tei->nodeName ); + } my $xpc = XML::LibXML::XPathContext->new( $tei ); my $ns; if( $tei->namespaceURI ) { @@ -482,6 +486,13 @@ sub _get_sigla { 1; +sub throw { + Text::Tradition::Error->throw( + 'ident' => 'Parser::TEI error', + 'message' => $_[0], + ); +} + =head1 BUGS / TODO =over