X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FSelf.pm;h=ae70531b4ee2cdd15d75f1ebe912e623ce6845c9;hb=9fef629bd3a741a6d74d130f10056898d504fb47;hp=ca58c33bd1cf3c812fb27888012847f09c1445ee;hpb=15db7774a381c3ffff41a26bcb9f9e7bc9e65515;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index ca58c33..ae70531 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -3,6 +3,8 @@ package Text::Tradition::Parser::Self; use strict; use warnings; use Text::Tradition::Parser::GraphML qw/ graphml_parse /; +use Text::Tradition::UserStore; +use TryCatch; =head1 NAME @@ -94,6 +96,8 @@ source of the XML to be parsed. =begin testing +use File::Temp; +use Test::Warn; use Text::Tradition; binmode STDOUT, ":utf8"; binmode STDERR, ":utf8"; @@ -106,26 +110,65 @@ my $t = Text::Tradition->new( 'file' => $tradition, ); -is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" ); +is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" ); if( $t ) { is( scalar $t->collation->readings, 319, "Collation has all readings" ); is( scalar $t->collation->paths, 376, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } +# TODO add a relationship, add a stemma, write graphml, reparse it, check that +# the new data is there +$t->language('Greek'); +$t->add_stemma( 'dotfile' => 't/data/florilegium.dot' ); +$t->collation->add_relationship( 'w12', 'w13', + { 'type' => 'grammatical', 'scope' => 'global', + 'annotation' => 'This is some note' } ); +ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" ); +my $graphml_str = $t->collation->as_graphml; + +my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str ); +is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" ); +if( $newt ) { + is( scalar $newt->collation->readings, 319, "Collation has all readings" ); + is( scalar $newt->collation->paths, 376, "Collation has all paths" ); + is( scalar $newt->witnesses, 13, "Collation has all witnesses" ); + is( scalar $newt->collation->relationships, 1, "Collation has added relationship" ); + is( $newt->language, 'Greek', "Tradition has correct language setting" ); + my $rel = $newt->collation->get_relationship( 'w12', 'w13' ); + ok( $rel, "Found set relationship" ); + is( $rel->annotation, 'This is some note', "Relationship has its properties" ); + is( scalar $newt->stemmata, 1, "Tradition has its stemma" ); + is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" ); +} + +# Test user save / restore +my $fh = File::Temp->new(); +my $file = $fh->filename; +$fh->close; +my $dsn = "dbi:SQLite:dbname=$file"; +my $userstore = Text::Tradition::UserStore->new( { dsn => $dsn, + extra_args => { create => 1 } } ); +my $scope = $userstore->new_scope(); +my $testuser = $userstore->add_user( { url => 'http://example.com' } ); +is( ref( $testuser ), 'Text::Tradition::User', "Created test user via userstore" ); +$testuser->add_tradition( $newt ); +is( $newt->user->id, $testuser->id, "Assigned tradition to test user" ); +$graphml_str = $newt->collation->as_graphml; +my $usert; +warning_is { + $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str ); +} 'DROPPING user assignment without a specified userstore', + "Got expected user drop warning on parse"; +$usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str, + 'userstore' => { 'dsn' => $dsn } ); +is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" ); + + =end testing =cut -my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, - $START_KEY, $END_KEY, $LACUNA_KEY, $COMMON_KEY, - $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY, - $SCOPE_KEY, $CORRECT_KEY, $INDEP_KEY ) - = qw/ id text identical rank - is_start is_end is_lacuna is_common - source target witness extra relationship - scope non_correctable non_independent /; - sub parse { my( $tradition, $opts ) = @_; @@ -139,87 +182,139 @@ sub parse { # print STDERR "Setting graph globals\n"; $tradition->name( $graph_data->{'name'} ); my $use_version; + my $tmeta = $tradition->meta; + my $cmeta = $collation->meta; foreach my $gkey ( keys %{$graph_data->{'global'}} ) { my $val = $graph_data->{'global'}->{$gkey}; if( $gkey eq 'version' ) { $use_version = $val; + } elsif( $gkey eq 'stemmata' ) { + # Parse the stemmata into objects + foreach my $dotstr ( split( /\n/, $val ) ) { + $tradition->add_stemma( 'dot' => $dotstr ); + } + } elsif( $gkey eq 'user' ) { + # Assign the tradition to the user if we can + if( exists $opts->{'userstore'} ) { + my $userdir; + try { + $userdir = Text::Tradition::UserStore->new( $opts->{'userstore'} ); + } catch { + warn( "Could not connect to specified user store; DROPPING user assignment" ); + } + my $user = $userdir->find_user( { username => $val } ); + if( $user ) { + $user->add_tradition( $tradition ); + } else { + warn( "Found no user with ID $val; DROPPING user assignment" ); + } + } else { + warn( "DROPPING user assignment without a specified userstore" ); + } + } elsif( $tmeta->has_attribute( $gkey ) ) { + $tradition->$gkey( $val ); } else { $collation->$gkey( $val ); } } - # Add the nodes to the graph. + # Add the nodes to the graph. + # Note any reading IDs that were changed in order to comply with XML + # name restrictions; we have to hardcode start & end. + my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' ); - # print STDERR "Adding graph nodes\n"; + # print STDERR "Adding collation readings\n"; foreach my $n ( @{$graph_data->{'nodes'}} ) { # If it is the start or end node, we already have one, so # grab the rank and go. - next if( defined $n->{$START_KEY} ); - if( defined $n->{$END_KEY} ) { - $collation->end->rank( $n->{$RANK_KEY} ); + next if( defined $n->{'is_start'} ); + if( defined $n->{'is_end'} ) { + $collation->end->rank( $n->{'rank'} ); next; } - - # First extract the data that we can use without reference to - # anything else. - - # Create the node. - my $reading_options = { - 'id' => $n->{$IDKEY}, - 'is_lacuna' => $n->{$LACUNA_KEY}, - 'is_common' => $n->{$COMMON_KEY}, - }; - my $rank = $n->{$RANK_KEY}; - $reading_options->{'rank'} = $rank if $rank; - my $text = $n->{$TOKENKEY}; - $reading_options->{'text'} = $text if $text; - - my $gnode = $collation->add_reading( $reading_options ); + my $gnode = $collation->add_reading( $n ); + if( $gnode->id ne $n->{'id'} ) { + $namechange{$n->{'id'}} = $gnode->id; + } } # Now add the edges. - # print STDERR "Adding graph edges\n"; + # print STDERR "Adding collation path edges\n"; foreach my $e ( @{$graph_data->{'edges'}} ) { - my $from = $e->{$SOURCE_KEY}; - my $to = $e->{$TARGET_KEY}; - - # We need the witness, and whether it is an 'extra' reading path. - my $wit = $e->{$WITNESS_KEY}; - warn "No witness label on path edge!" unless $wit; - my $extra = $e->{$EXTRA_KEY}; - my $label = $wit . ( $extra ? $collation->ac_label : '' ); - $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label ); + my $sourceid = exists $namechange{$e->{'source'}->{'id'}} + ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'}; + my $targetid = exists $namechange{$e->{'target'}->{'id'}} + ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'}; + my $from = $collation->reading( $sourceid ); + my $to = $collation->reading( $targetid ); + + warn "No witness label on path edge!" unless $e->{'witness'}; + my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' ); + $collation->add_path( $from, $to, $label ); + # Add the witness if we don't have it already. - unless( $witnesses{$wit} ) { - $tradition->add_witness( sigil => $wit ); - $witnesses{$wit} = 1; + unless( $witnesses{$e->{'witness'}} ) { + $tradition->add_witness( + sigil => $e->{'witness'}, 'sourcetype' => 'collation' ); + $witnesses{$e->{'witness'}} = 1; } - $tradition->witness( $wit )->is_layered( 1 ) if $extra; + $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'}; } ## Done with the main graph, now look at the relationships. # Nodes are added via the call to add_reading above. We only need # add the relationships themselves. # TODO check that scoping does trt - foreach my $e ( @{$rel_data->{'edges'}} ) { - my $from = $e->{$SOURCE_KEY}; - my $to = $e->{$TARGET_KEY}; - my $relationship_opts = { - 'type' => $e->{$RELATIONSHIP_KEY}, - 'scope' => $e->{$SCOPE_KEY}, - }; - $relationship_opts->{'non_correctable'} = $e->{$CORRECT_KEY} - if exists $e->{$CORRECT_KEY}; - $relationship_opts->{'non_independent'} = $e->{$INDEP_KEY} - if exists $e->{$INDEP_KEY}; - $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, - $relationship_opts ); + $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels + foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) { + my $sourceid = exists $namechange{$e->{'source'}->{'id'}} + ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'}; + my $targetid = exists $namechange{$e->{'target'}->{'id'}} + ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'}; + my $from = $collation->reading( $sourceid ); + my $to = $collation->reading( $targetid ); + delete $e->{'source'}; + delete $e->{'target'}; + # The remaining keys are relationship attributes. + # Backward compatibility... + if( $use_version eq '2.0' || $use_version eq '3.0' ) { + delete $e->{'class'}; + $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'}; + } + # Add the specified relationship unless we already have done. + my $rel_exists; + if( $e->{'scope'} ne 'local' ) { + my $relobj = $collation->get_relationship( $from, $to ); + if( $relobj && $relobj->scope eq $e->{'scope'} + && $relobj->type eq $e->{'type'} ) { + $rel_exists = 1; + } + } + try { + $collation->add_relationship( $from, $to, $e ) unless $rel_exists; + } catch( Text::Tradition::Error $e ) { + warn "DROPPING $from -> $to: " . $e->message; + } } # Save the text for each witness so that we can ensure consistency # later on - $tradition->collation->text_from_paths(); + $collation->text_from_paths(); +} +## Return the relationship that comes first in priority. +my %LAYERS = ( + 'collated' => 1, + 'orthographic' => 2, + 'spelling' => 3, + ); + +sub _layersort_rel { + my( $a, $b ) = @_; + my $key = exists $a->{'type'} ? 'type' : 'relationship'; + my $at = $LAYERS{$a->{$key}} || 99; + my $bt = $LAYERS{$b->{$key}} || 99; + return $at <=> $bt; } 1;