1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse populate_witness_path /;
9 Text::Tradition::Parser::GraphML
15 my $t_from_file = Text::Tradition->new(
18 'file' => '/path/to/tradition.xml'
21 my $t_from_string = Text::Tradition->new(
24 'string' => $tradition_xml,
29 Parser module for Text::Tradition to read in its own GraphML output format.
30 GraphML is a relatively simple graph description language; a 'graph' element
31 can have 'node' and 'edge' elements, and each of these can have simple 'data'
32 elements for attributes to be saved.
34 The graph itself has attributes as in the Collation object:
44 =item * wit_list_separator
48 The node objects have the following attributes:
64 The edge objects have the following attributes:
70 =item * witness (for 'path' class edges)
72 =item * extra (for 'path' class edges)
74 =item * relationship (for 'relationship' class edges)
76 =item * equal_rank (for 'relationship' class edges)
78 =item * non_correctable (for 'relationship' class edges)
80 =item * non_independent (for 'relationship' class edges)
88 parse( $graph, $opts );
90 Takes an initialized Text::Tradition object and a set of options; creates
91 the appropriate nodes and edges on the graph. The options hash should
92 include either a 'file' argument or a 'string' argument, depending on the
93 source of the XML to be parsed.
98 binmode STDOUT, ":utf8";
99 binmode STDERR, ":utf8";
100 eval { no warnings; binmode $DB::OUT, ":utf8"; };
102 my $tradition = 't/data/florilegium_graphml.xml';
103 my $t = Text::Tradition->new(
106 'file' => $tradition,
109 is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
112 is( scalar $t->collation->paths, 2854, "Collation has all paths" );
113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120 my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY,
121 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY )
122 = qw/ name reading identical rank class
123 source target witness extra relationship/;
126 my( $tradition, $opts ) = @_;
127 my $graph_data = graphml_parse( $opts );
129 my $collation = $tradition->collation;
132 # Set up the graph-global attributes. They will appear in the
133 # hash under their accessor names.
134 print STDERR "Setting graph globals\n";
135 $tradition->name( $graph_data->{'name'} );
136 foreach my $gkey ( keys %{$graph_data->{'attr'}} ) {
137 my $val = $graph_data->{'attr'}->{$gkey};
138 $collation->$gkey( $val );
141 # Add the nodes to the graph.
143 my $extra_data = {}; # Keep track of data that needs to be processed
144 # after the nodes & edges are created.
145 print STDERR "Adding graph nodes\n";
146 foreach my $n ( @{$graph_data->{'nodes'}} ) {
147 # First extract the data that we can use without reference to
149 my %node_data = %$n; # Need $n itself untouched for edge processing
150 my $nodeid = delete $node_data{$IDKEY};
151 my $reading = delete $node_data{$TOKENKEY};
152 my $class = delete $node_data{$CLASS_KEY} || '';
153 my $rank = delete $node_data{$RANK_KEY};
155 # Create the node. Current valid classes are common and meta.
156 # Everything else is a normal reading.
157 my $gnode = $collation->add_reading( $nodeid );
158 $gnode->text( $reading );
159 $gnode->make_common if $class eq 'common';
160 $gnode->is_meta( 1 ) if $class eq 'meta';
161 # This is a horrible hack.
162 $gnode->is_lacuna( $reading =~ /^\#LACUNA/ );
163 $gnode->rank( $rank ) if defined $rank;
165 # Now save the data that we need for post-processing,
167 if ( keys %node_data ) {
168 $extra_data->{$nodeid} = \%node_data
173 print STDERR "Adding graph edges\n";
174 my $has_ante_corr = {};
175 foreach my $e ( @{$graph_data->{'edges'}} ) {
176 my $from = $e->{$SOURCE_KEY};
177 my $to = $e->{$TARGET_KEY};
178 my $class = $e->{$CLASS_KEY};
180 # We may have more information depending on the class.
181 if( $class eq 'path' ) {
182 # We need the witness, and whether it is an 'extra' reading path.
183 my $wit = $e->{$WITNESS_KEY};
184 warn "No witness label on path edge!" unless $wit;
185 my $extra = $e->{$EXTRA_KEY};
186 my $label = $wit . ( $extra ? $collation->ac_label : '' );
187 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
188 # Add the witness if we don't have it already.
189 unless( $witnesses{$wit} ) {
190 $tradition->add_witness( sigil => $wit );
191 $witnesses{$wit} = 1;
193 $has_ante_corr->{$wit} = 1 if $extra;
194 } elsif( $class eq 'relationship' ) {
195 # We need the metadata about the relationship.
196 my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} };
197 $opts->{'equal_rank'} = $e->{'equal_rank'}
198 if exists $e->{'equal_rank'};
199 $opts->{'non_correctable'} = $e->{'non_correctable'}
200 if exists $e->{'non_correctable'};
201 $opts->{'non_independent'} = $e->{'non_independent'}
202 if exists $e->{'non_independent'};
203 warn "No relationship type for relationship edge!" unless $opts->{'type'};
204 my( $ok, @result ) = $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts );
206 warn "Did not add relationship: @result";
211 ## Deal with node information (transposition, relationships, etc.) that
212 ## needs to be processed after all the nodes are created.
213 print STDERR "Adding second-pass node data\n";
214 foreach my $nkey ( keys %$extra_data ) {
215 foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
216 my $this_reading = $collation->reading( $nkey );
217 if( $edkey eq $TRANSPOS_KEY ) {
218 my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
219 $this_reading->set_identical( $other_reading );
221 warn "Unfamiliar reading node data $edkey for $nkey";
226 # Set the $witness->path arrays for each wit.
227 populate_witness_path( $tradition, $has_ante_corr );
236 =item * Make this into a stream parser with GraphML
238 =item * Simply field -> attribute correspondence for nodes and edges
240 =item * Share key name constants with Collation.pm
246 This package is free software and is provided "as is" without express
247 or implied warranty. You can redistribute it and/or modify it under
248 the same terms as Perl itself.
252 Tara L Andrews E<lt>aurum@cpan.orgE<gt>