Stop shotgun-parsing XML files, and propagate unexpeced errors. #22
[scpubgit/stemmaweb.git] / lib / stemmaweb / Controller / Root.pm
index 7987334..be1ba72 100644 (file)
@@ -3,6 +3,8 @@ use Moose;
 use namespace::autoclean;
 use Text::Tradition::Analysis qw/ run_analysis /;
 use TryCatch;
+use XML::LibXML;
+use XML::LibXML::XPathContext;
 
 
 BEGIN { extends 'Catalyst::Controller' }
@@ -51,6 +53,17 @@ sub about :Local :Args(0) {
        $c->stash->{template} = 'about.tt';
 }
 
+=head2 help/*
+
+A dispatcher for documentation of various aspects of the application.
+
+=cut
+
+sub help :Local :Args(1) {
+       my( $self, $c, $topic ) = @_;
+       $c->stash->{template} = "$topic.tt";
+}
+
 =head1 Elements of index page
 
 =head2 directory
@@ -121,18 +134,40 @@ sub newtradition :Local :Args(0) {
        my $tradition;
        my $errmsg;
        if( $ext eq 'xml' ) {
-               # Try the different XML parsing options to see if one works.
-               foreach my $type ( qw/ CollateX CTE TEI / ) {
+               my $type;
+               # Parse the XML to see which flavor it is.
+               my $parser = XML::LibXML->new();
+               my $doc;
+               try {
+                       $doc = $parser->parse_file( $newopts{'file'} );
+               } catch( $err ) {
+                       $errmsg = "XML file parsing error: $err";
+               }
+               if( $doc ) {
+                       if( $doc->documentElement->nodeName eq 'GraphML' ) {
+                               $type = 'CollateX';
+                       } elsif( $doc->documentElement->nodeName ne 'TEI' ) {
+                               $errmsg = 'Unrecognized XML type ' . $doc->documentElement->nodeName;
+                       } else {
+                               my $xpc = XML::LibXML::XPathContext->new( $doc->documentElement );
+                               my $venc = $xpc->findvalue( '/TEI/teiHeader/encodingDesc/variantEncoding/attribute::method' );
+                               if( $venc && $venc eq 'double-end-point' ) {
+                                       $type = 'CTE';
+                               } else {
+                                       $type = 'TEI';
+                               }
+                       }
+               }
+               # Try the relevant XML parsing option.
+               if( $type ) {
+                       delete $newopts{'file'};
+                       $newopts{'xmlobj'} = $doc;
                        try {
                                $tradition = Text::Tradition->new( %newopts, 'input' => $type );
                        } catch ( Text::Tradition::Error $e ) {
                                $errmsg = $e->message;
-                       } catch {
-                               $errmsg = "Unexpected parsing error";
-                       }
-                       if( $tradition ) {
-                               $errmsg = undef;
-                               last;
+                       } catch ( $e ) {
+                               $errmsg = "Unexpected parsing error: $e";
                        }
                }
        } elsif( $ext =~ /^(txt|csv|xls(x)?)$/ ) {
@@ -150,8 +185,8 @@ sub newtradition :Local :Args(0) {
                                );
                } catch ( Text::Tradition::Error $e ) {
                        $errmsg = $e->message;
-               } catch {
-                       $errmsg = "Unexpected parsing error";
+               } catch ( $e ) {
+                       $errmsg = "Unexpected parsing error: $e";
                }
        } else {
                # Error unless we have a recognized filename extension
@@ -220,7 +255,8 @@ sub textinfo :Local :Args(1) {
                }
                # Handle language param, making Default => null
                my $langval = delete $params->{language} || 'Default';
-               unless( $tradition->language eq $langval ) {
+               
+               unless( $tradition->language eq $langval || !$tradition->can('language') ) {
                        try {
                                $tradition->language( $langval );
                                $changed = 1;
@@ -275,11 +311,14 @@ sub textinfo :Local :Args(1) {
        my $textinfo = {
                textid => $textid,
                name => $tradition->name,
-               language => $tradition->language,
+               #language => $tradition->language,
                public => $tradition->public || 0,
                owner => $tradition->user ? $tradition->user->email : undef,
                witnesses => [ map { $_->sigil } $tradition->witnesses ],
        };
+       if( $tradition->can('language') ) {
+               $textinfo->{'language'} = $tradition->language;
+       }
        my @stemmasvg = map { $_->as_svg() } $tradition->stemmata;
        map { $_ =~ s/\n/ /mg } @stemmasvg;
        $textinfo->{stemmata} = \@stemmasvg;
@@ -336,6 +375,11 @@ sub stemma :Local :Args(2) {
        if( $c->req->method eq 'POST' ) {
                if( $ok eq 'full' ) {
                        my $dot = $c->request->body_params->{'dot'};
+                       # Graph::Reader::Dot does not handle bare unicode. We get around this
+                       # by wrapping all words in double quotes, as long as they aren't already
+                       # wrapped, and as long as they aren't the initial 'digraph stemma'.
+                       # Horrible HACK.
+                       $dot =~ s/(?<!")\b(?!(?:digraph|stemma)\b)(\w+)\b(?!")/"$1"/g;
                        try {
                                if( $stemmaid eq 'n' ) {
                                        # We are adding a new stemma.
@@ -418,6 +462,30 @@ sub stemmadot :Local :Args(2) {
        $c->forward('View::JSON');
 }
 
+=head2 download
+
+ GET /download/$textid
+Returns the full XML definition of the tradition and its stemmata, if any.
+=cut
+
+sub download :Local :Args(1) {
+       my( $self, $c, $textid ) = @_;
+       my $tradition = $c->model('Directory')->tradition( $textid );
+       unless( $tradition ) {
+               return _json_error( $c, 404, "No tradition with ID $textid" );
+       }
+       my $ok = _check_permission( $c, $tradition );
+       return unless $ok;
+       try {
+               $c->stash->{'result'} = $tradition->collation->as_graphml();
+       } catch( Text::Tradition::Error $e ) {
+               return _json_error( $c, 500, $e->message );
+       }
+       $c->forward('View::GraphML');
+}
+
 ####################
 ### Helper functions
 ####################