X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FSelf.pm;h=addff132551be9fab6ce6847b3c38d0b3e2d90db;hb=94c00c71ffabc3dc155d237364e76af4385dcb96;hp=781a73913fe0aadd60e29f9fdc5a60eced4255b3;hpb=32014ec936b48809e9f2dae8f20a01c887253427;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index 781a739..addff13 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -27,86 +27,100 @@ graph. =cut -my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $POSITION_KEY ) - = qw/ name reading identical position /; +# TODO share these with Collation.pm somehow +my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY, + $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY ) + = qw/ name reading identical rank class + source target witness extra relationship/; sub parse { my( $tradition, $graphml_str ) = @_; + + # TODO this is begging for stream parsing instead of multiple loops. my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str ); my $collation = $tradition->collation; my %witnesses; # Add the nodes to the graph. + # TODO Are we adding extra start/end nodes? my $extra_data = {}; # Keep track of data that needs to be processed # after the nodes & edges are created. + print STDERR "Adding graph nodes\n"; foreach my $n ( @{$graph_data->{'nodes'}} ) { - # Could use a better way of registering these - my %node_data = %$n; - my $nodeid = delete $node_data{$IDKEY}; - my $reading = delete $node_data{$TOKENKEY}; - my $gnode = $collation->add_reading( $nodeid ); - $gnode->text( $reading ); - - # Now save the rest of the data, i.e. not the ID or label, - # if it exists. - if ( keys %node_data ) { - $extra_data->{$nodeid} = \%node_data; - } + # First extract the data that we can use without reference to + # anything else. + my %node_data = %$n; # Need $n itself untouched for edge processing + my $nodeid = delete $node_data{$IDKEY}; + my $reading = delete $node_data{$TOKENKEY}; + my $class = delete $node_data{$CLASS_KEY} || ''; + my $rank = delete $node_data{$RANK_KEY}; + + # Create the node. Current valid classes are common and meta. + # Everything else is a normal reading. + my $gnode = $collation->add_reading( $nodeid ); + $gnode->text( $reading ); + $gnode->make_common if $class eq 'common'; + $gnode->is_meta( 1 ) if $class eq 'meta'; + $gnode->rank( $rank ) if defined $rank; + + # Now save the data that we need for post-processing, + # if it exists. + if ( keys %node_data ) { + $extra_data->{$nodeid} = \%node_data + } } - + # Now add the edges. + print STDERR "Adding graph edges\n"; + $DB::single = 1; foreach my $e ( @{$graph_data->{'edges'}} ) { - my %edge_data = %$e; - my $from = delete $edge_data{'source'}; - my $to = delete $edge_data{'target'}; - - # Whatever is left tells us what kind of edge it is. - foreach my $wkey ( keys %edge_data ) { - if( $wkey =~ /^witness/ ) { - my $wit = $edge_data{$wkey}; - unless( $witnesses{$wit} ) { - $tradition->add_witness( sigil => $wit ); - $witnesses{$wit} = 1; - } - my $label = $wkey eq 'witness_ante_corr' - ? $wit . $collation->ac_label : $wit; - $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label ); - } else { - my $rel = $edge_data{$wkey}; - # TODO handle global relationships - $collation->add_relationship( $rel, $from->{$IDKEY}, $to->{$IDKEY} ); - } - } + my $from = $e->{$SOURCE_KEY}; + my $to = $e->{$TARGET_KEY}; + my $class = $e->{$CLASS_KEY}; + + # We may have more information depending on the class. + if( $class eq 'path' ) { + # We need the witness, and whether it is an 'extra' reading path. + my $wit = $e->{$WITNESS_KEY}; + warn "No witness label on path edge!" unless $wit; + my $extra = $e->{$EXTRA_KEY}; + my $label = $wit . ( $extra ? $collation->ac_label : '' ); + $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label ); + # Add the witness if we don't have it already. + unless( $witnesses{$wit} ) { + $tradition->add_witness( sigil => $wit ); + $witnesses{$wit} = 1; + } + } elsif( $class eq 'relationship' ) { + # We need the relationship type. + my $rel = $e->{$RELATIONSHIP_KEY}; + warn "No relationship type for relationship edge!" unless $rel; + $collation->add_relationship( $rel, $from->{$IDKEY}, $to->{$IDKEY} ); + } } ## Deal with node information (transposition, relationships, etc.) that ## needs to be processed after all the nodes are created. + print STDERR "Adding second-pass node data\n"; + my $linear = undef; foreach my $nkey ( keys %$extra_data ) { - foreach my $edkey ( keys %{$extra_data->{$nkey}} ) { - my $this_reading = $collation->reading( $nkey ); - if( $edkey eq $TRANSPOS_KEY ) { - my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} ); - if( $collation->linear ) { - $this_reading->set_identical( $other_reading ); - } else { - $collation->merge_readings( $other_reading, $this_reading ); - } - } elsif ( $edkey eq $POSITION_KEY ) { - $this_reading->position( $extra_data->{$nkey}->{$edkey} ); - } else { - warn "Unfamiliar reading node data $edkey for $nkey"; - } - } + foreach my $edkey ( keys %{$extra_data->{$nkey}} ) { + my $this_reading = $collation->reading( $nkey ); + if( $edkey eq $TRANSPOS_KEY ) { + $DB::single = 1; + my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} ); + # We evidently have a linear graph. + $linear = 1; + $this_reading->set_identical( $other_reading ); + } else { + warn "Unfamiliar reading node data $edkey for $nkey"; + } + } } - - # We know what the beginning and ending nodes are, no need to - # search or reset. - my $end_node = $collation->reading( '#END#' ); - $DB::single = 1; - # Walk the paths and make reading sequences for our witnesses. - $collation->walk_witness_paths( $end_node ); + $collation->linear( $linear ); + # TODO We probably need to set the $witness->path arrays for each wit. } =back