X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FSelf.pm;h=ca58c33bd1cf3c812fb27888012847f09c1445ee;hb=15db7774a381c3ffff41a26bcb9f9e7bc9e65515;hp=7bc2c6d5abbd36453df9c2d5bc71bc1584aece3c;hpb=b74d89f9e926466ba4ded77746fd0f98912cc17a;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index 7bc2c6d..ca58c33 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -109,7 +109,7 @@ my $t = Text::Tradition->new( is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" ); if( $t ) { is( scalar $t->collation->readings, 319, "Collation has all readings" ); - is( scalar $t->collation->paths, 2854, "Collation has all paths" ); + is( scalar $t->collation->paths, 376, "Collation has all paths" ); is( scalar $t->witnesses, 13, "Collation has all witnesses" ); } @@ -117,110 +117,109 @@ if( $t ) { =cut -my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY, - $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY ) - = qw/ name reading identical rank class - source target witness extra relationship/; +my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, + $START_KEY, $END_KEY, $LACUNA_KEY, $COMMON_KEY, + $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY, + $SCOPE_KEY, $CORRECT_KEY, $INDEP_KEY ) + = qw/ id text identical rank + is_start is_end is_lacuna is_common + source target witness extra relationship + scope non_correctable non_independent /; sub parse { my( $tradition, $opts ) = @_; - my $graph_data = graphml_parse( $opts ); + + # Collation data is in the first graph; relationship-specific stuff + # is in the second. + my( $graph_data, $rel_data ) = graphml_parse( $opts ); my $collation = $tradition->collation; my %witnesses; - # Set up the graph-global attributes. They will appear in the - # hash under their accessor names. - print STDERR "Setting graph globals\n"; + # print STDERR "Setting graph globals\n"; $tradition->name( $graph_data->{'name'} ); - foreach my $gkey ( keys %{$graph_data->{'attr'}} ) { - my $val = $graph_data->{'attr'}->{$gkey}; - $collation->$gkey( $val ); + my $use_version; + foreach my $gkey ( keys %{$graph_data->{'global'}} ) { + my $val = $graph_data->{'global'}->{$gkey}; + if( $gkey eq 'version' ) { + $use_version = $val; + } else { + $collation->$gkey( $val ); + } } # Add the nodes to the graph. - my $extra_data = {}; # Keep track of data that needs to be processed - # after the nodes & edges are created. - print STDERR "Adding graph nodes\n"; - foreach my $n ( @{$graph_data->{'nodes'}} ) { + # print STDERR "Adding graph nodes\n"; + foreach my $n ( @{$graph_data->{'nodes'}} ) { + # If it is the start or end node, we already have one, so + # grab the rank and go. + next if( defined $n->{$START_KEY} ); + if( defined $n->{$END_KEY} ) { + $collation->end->rank( $n->{$RANK_KEY} ); + next; + } + # First extract the data that we can use without reference to # anything else. - my %node_data = %$n; # Need $n itself untouched for edge processing - my $nodeid = delete $node_data{$IDKEY}; - my $reading = delete $node_data{$TOKENKEY}; - my $class = delete $node_data{$CLASS_KEY} || ''; - my $rank = delete $node_data{$RANK_KEY}; - # Create the node. Current valid classes are common and meta. - # Everything else is a normal reading. - my $gnode = $collation->add_reading( $nodeid ); - $gnode->text( $reading ); - $gnode->make_common if $class eq 'common'; - $gnode->is_meta( 1 ) if $class eq 'meta'; - # This is a horrible hack. - $gnode->is_lacuna( $reading =~ /^\#LACUNA/ ); - $gnode->rank( $rank ) if defined $rank; - - # Now save the data that we need for post-processing, - # if it exists. - if ( keys %node_data ) { - $extra_data->{$nodeid} = \%node_data - } + # Create the node. + my $reading_options = { + 'id' => $n->{$IDKEY}, + 'is_lacuna' => $n->{$LACUNA_KEY}, + 'is_common' => $n->{$COMMON_KEY}, + }; + my $rank = $n->{$RANK_KEY}; + $reading_options->{'rank'} = $rank if $rank; + my $text = $n->{$TOKENKEY}; + $reading_options->{'text'} = $text if $text; + + my $gnode = $collation->add_reading( $reading_options ); } # Now add the edges. - print STDERR "Adding graph edges\n"; + # print STDERR "Adding graph edges\n"; foreach my $e ( @{$graph_data->{'edges'}} ) { my $from = $e->{$SOURCE_KEY}; my $to = $e->{$TARGET_KEY}; - my $class = $e->{$CLASS_KEY}; - - # We may have more information depending on the class. - if( $class eq 'path' ) { - # We need the witness, and whether it is an 'extra' reading path. - my $wit = $e->{$WITNESS_KEY}; - warn "No witness label on path edge!" unless $wit; - my $extra = $e->{$EXTRA_KEY}; - my $label = $wit . ( $extra ? $collation->ac_label : '' ); - $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label ); - # Add the witness if we don't have it already. - unless( $witnesses{$wit} ) { - $tradition->add_witness( sigil => $wit ); - $witnesses{$wit} = 1; - } - $tradition->witness( $wit )->is_layered( 1 ) if $extra; - } elsif( $class eq 'relationship' ) { - # We need the metadata about the relationship. - my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} }; - $opts->{'equal_rank'} = $e->{'equal_rank'} - if exists $e->{'equal_rank'}; - $opts->{'non_correctable'} = $e->{'non_correctable'} - if exists $e->{'non_correctable'}; - $opts->{'non_independent'} = $e->{'non_independent'} - if exists $e->{'non_independent'}; - warn "No relationship type for relationship edge!" unless $opts->{'type'}; - my( $ok, @result ) = $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts ); - unless( $ok ) { - warn "Did not add relationship: @result"; - } - } - } - ## Deal with node information (transposition, relationships, etc.) that - ## needs to be processed after all the nodes are created. - print STDERR "Adding second-pass node data\n"; - foreach my $nkey ( keys %$extra_data ) { - foreach my $edkey ( keys %{$extra_data->{$nkey}} ) { - my $this_reading = $collation->reading( $nkey ); - if( $edkey eq $TRANSPOS_KEY ) { - my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} ); - $this_reading->set_identical( $other_reading ); - } else { - warn "Unfamiliar reading node data $edkey for $nkey"; - } - } + # We need the witness, and whether it is an 'extra' reading path. + my $wit = $e->{$WITNESS_KEY}; + warn "No witness label on path edge!" unless $wit; + my $extra = $e->{$EXTRA_KEY}; + my $label = $wit . ( $extra ? $collation->ac_label : '' ); + $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label ); + # Add the witness if we don't have it already. + unless( $witnesses{$wit} ) { + $tradition->add_witness( sigil => $wit ); + $witnesses{$wit} = 1; + } + $tradition->witness( $wit )->is_layered( 1 ) if $extra; } + + ## Done with the main graph, now look at the relationships. + # Nodes are added via the call to add_reading above. We only need + # add the relationships themselves. + # TODO check that scoping does trt + foreach my $e ( @{$rel_data->{'edges'}} ) { + my $from = $e->{$SOURCE_KEY}; + my $to = $e->{$TARGET_KEY}; + my $relationship_opts = { + 'type' => $e->{$RELATIONSHIP_KEY}, + 'scope' => $e->{$SCOPE_KEY}, + }; + $relationship_opts->{'non_correctable'} = $e->{$CORRECT_KEY} + if exists $e->{$CORRECT_KEY}; + $relationship_opts->{'non_independent'} = $e->{$INDEP_KEY} + if exists $e->{$INDEP_KEY}; + $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, + $relationship_opts ); + } + + # Save the text for each witness so that we can ensure consistency + # later on + $tradition->collation->text_from_paths(); + } 1;