1 package Text::Tradition::Parser::CollateX;
5 use Text::Tradition::Parser::GraphML;
9 Text::Tradition::Parser::CollateX
13 Parser module for Text::Tradition, given a GraphML file from the
14 CollateX program that describes a collation graph. For further
15 information on the GraphML format for text collation, see
16 http://gregor.middell.net/collatex/
24 parse( $tradition, $init_options );
26 Takes an initialized Text::Tradition::Graph object and its initialization
27 options, including the data source; creates the appropriate nodes and edges
33 my $CONTENTKEY = 'token';
34 my $TRANSKEY = 'identical';
37 my( $tradition, $opts ) = @_;
38 my $graph_data = Text::Tradition::Parser::GraphML::parse( $opts );
39 my $collation = $tradition->collation;
40 my %witnesses; # Keep track of the witnesses we encounter as we
41 # run through the graph data.
43 # Add the nodes to the graph. First delete the start node, because
44 # GraphML graphs will have their own start nodes.
45 $collation->del_reading( $collation->start() );
46 $collation->del_reading( $collation->end() );
48 my $extra_data = {}; # Keep track of info to be processed after all
49 # nodes have been created
50 foreach my $n ( @{$graph_data->{'nodes'}} ) {
52 my $nodeid = delete $node_data{$IDKEY};
53 my $token = delete $node_data{$CONTENTKEY};
54 unless( defined $nodeid && defined $token ) {
55 warn "Did not find an ID or token for graph node, can't add it";
58 my $gnode = $collation->add_reading( $nodeid );
59 $gnode->text( $token );
61 # Whatever is left is extra info to be processed later.
62 if( keys %node_data ) {
63 $extra_data->{$nodeid} = \%node_data;
68 foreach my $e ( @{$graph_data->{'edges'}} ) {
70 my $from = delete $edge_data{'source'};
71 my $to = delete $edge_data{'target'};
73 # In CollateX, we have a distinct witness data ID per witness,
74 # so that we can have multiple witnesses per edge. We want to
75 # translate this to one witness per edge in our own
77 foreach my $ekey ( keys %edge_data ) {
78 my $wit = $edge_data{$ekey};
79 # Create the witness object if it does not yet exist.
80 unless( $witnesses{$wit} ) {
81 $tradition->add_witness( 'sigil' => $wit );
84 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $wit );
88 # Process the extra node data if it exists.
89 foreach my $nodeid ( keys %$extra_data ) {
90 my $ed = $extra_data->{$nodeid};
91 if( exists $ed->{$TRANSKEY} ) {
93 my $tn_reading = $collation->reading( $nodeid );
94 my $main_reading = $collation->reading( $ed->{$TRANSKEY} );
95 if( $collation->linear ) {
96 $tn_reading->set_identical( $main_reading );
98 $collation->merge_readings( $main_reading, $tn_reading );
100 } # else we don't have any other tags to process yet.
103 # Find the beginning and end nodes of the graph. The beginning node
104 # has no incoming edges; the end node has no outgoing edges.
105 my( $begin_node, $end_node );
106 foreach my $gnode ( $collation->readings() ) {
107 # print STDERR "Checking node " . $gnode->name . "\n";
108 my @outgoing = $gnode->outgoing();
109 my @incoming = $gnode->incoming();
111 unless( scalar @incoming ) {
112 warn "Already have a beginning node" if $begin_node;
113 $begin_node = $gnode;
114 $collation->start( $gnode );
116 unless( scalar @outgoing ) {
117 warn "Already have an ending node" if $end_node;
119 $collation->end( $gnode );
123 # TODO Need to populate $wit->path / uncorrected_path
125 # Now we have added the witnesses and their paths, so we can
126 # calculate their explicit positions.
127 # TODO CollateX does this, and we should just have it exported there.
128 $collation->calculate_ranks();
135 This package is free software and is provided "as is" without express
136 or implied warranty. You can redistribute it and/or modify it under
137 the same terms as Perl itself.
141 Tara L Andrews, aurum@cpan.org