1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
9 Text::Tradition::Parser::GraphML
15 my $t_from_file = Text::Tradition->new(
18 'file' => '/path/to/tradition.xml'
21 my $t_from_string = Text::Tradition->new(
24 'string' => $tradition_xml,
29 Parser module for Text::Tradition to read in its own GraphML output format.
30 GraphML is a relatively simple graph description language; a 'graph' element
31 can have 'node' and 'edge' elements, and each of these can have simple 'data'
32 elements for attributes to be saved.
34 The graph itself has attributes as in the Collation object:
44 =item * wit_list_separator
48 The node objects have the following attributes:
64 The edge objects have the following attributes:
70 =item * witness (for 'path' class edges)
72 =item * extra (for 'path' class edges)
74 =item * relationship (for 'relationship' class edges)
76 =item * equal_rank (for 'relationship' class edges)
78 =item * non_correctable (for 'relationship' class edges)
80 =item * non_independent (for 'relationship' class edges)
88 parse( $graph, $opts );
90 Takes an initialized Text::Tradition object and a set of options; creates
91 the appropriate nodes and edges on the graph. The options hash should
92 include either a 'file' argument or a 'string' argument, depending on the
93 source of the XML to be parsed.
98 binmode STDOUT, ":utf8";
99 binmode STDERR, ":utf8";
100 eval { no warnings; binmode $DB::OUT, ":utf8"; };
102 my $tradition = 't/data/florilegium_graphml.xml';
103 my $t = Text::Tradition->new(
106 'file' => $tradition,
109 is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
112 is( scalar $t->collation->paths, 376, "Collation has all paths" );
113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120 my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY,
121 $START_KEY, $END_KEY, $LACUNA_KEY,
122 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY,
123 $COLO_KEY, $CORRECT_KEY, $INDEP_KEY )
124 = qw/ name reading identical rank class
125 is_start is_end is_lacuna
126 source target witness extra relationship
127 equal_rank non_correctable non_independent /;
130 my( $tradition, $opts ) = @_;
131 my $graph_data = graphml_parse( $opts );
133 my $collation = $tradition->collation;
136 # Set up the graph-global attributes. They will appear in the
137 # hash under their accessor names.
139 # print STDERR "Setting graph globals\n";
140 $tradition->name( $graph_data->{'name'} );
141 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
142 my $val = $graph_data->{'global'}->{$gkey};
143 if( $gkey eq 'version' ) {
146 $collation->$gkey( $val );
150 # Many of our tags changed.
153 $COLO_KEY = 'colocated';
156 # Add the nodes to the graph.
158 my $extra_data = {}; # Keep track of data that needs to be processed
159 # after the nodes & edges are created.
160 # print STDERR "Adding graph nodes\n";
161 foreach my $n ( @{$graph_data->{'nodes'}} ) {
162 unless( $use_version ) {
164 $n->{$START_KEY} = 1 if $n->{$IDKEY} eq '#START#';
165 $n->{$END_KEY} = 1 if $n->{$IDKEY} eq '#END#';
168 # If it is the start or end node, we already have one, so
169 # grab the rank and go.
170 next if( defined $n->{$START_KEY} );
171 if( defined $n->{$END_KEY} ) {
172 $collation->end->rank( $n->{$RANK_KEY} );
176 # First extract the data that we can use without reference to
178 my %node_data = %$n; # Need $n itself untouched for edge processing
181 my $reading_options = {
182 'id' => delete $node_data{$IDKEY},
183 'is_lacuna' => delete $node_data{$LACUNA_KEY},
185 my $rank = delete $node_data{$RANK_KEY};
186 $reading_options->{'rank'} = $rank if $rank;
187 my $text = delete $node_data{$TOKENKEY};
188 $reading_options->{'text'} = $text if $text;
190 # This is a horrible hack for backwards compatibility.
191 unless( $use_version ) {
192 $reading_options->{'is_lacuna'} = 1
193 if $reading_options->{'text'} =~ /^\#LACUNA/;
196 delete $node_data{$CLASS_KEY}; # Not actually used
197 my $gnode = $collation->add_reading( $reading_options );
199 # Now save the data that we need for post-processing,
200 # if it exists. TODO this is unneeded after conversion
201 if ( keys %node_data ) {
202 $extra_data->{$gnode->id} = \%node_data
207 # print STDERR "Adding graph edges\n";
208 foreach my $e ( @{$graph_data->{'edges'}} ) {
209 my $from = $e->{$SOURCE_KEY};
210 my $to = $e->{$TARGET_KEY};
211 my $class = $e->{$CLASS_KEY};
213 # We may have more information depending on the class.
214 if( $class eq 'path' ) {
215 # We need the witness, and whether it is an 'extra' reading path.
216 my $wit = $e->{$WITNESS_KEY};
217 warn "No witness label on path edge!" unless $wit;
218 my $extra = $e->{$EXTRA_KEY};
219 my $label = $wit . ( $extra ? $collation->ac_label : '' );
220 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
221 # Add the witness if we don't have it already.
222 unless( $witnesses{$wit} ) {
223 $tradition->add_witness( sigil => $wit );
224 $witnesses{$wit} = 1;
226 $tradition->witness( $wit )->is_layered( 1 ) if $extra;
227 } elsif( $class eq 'relationship' ) {
228 # We need the metadata about the relationship.
229 my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} };
230 $opts->{$COLO_KEY} = $e->{$COLO_KEY}
231 if exists $e->{$COLO_KEY};
232 $opts->{$CORRECT_KEY} = $e->{$CORRECT_KEY}
233 if exists $e->{$CORRECT_KEY};
234 $opts->{$INDEP_KEY} = $e->{$INDEP_KEY}
235 if exists $e->{$INDEP_KEY};
236 warn "No relationship type for relationship edge!" unless $opts->{'type'};
237 my( $ok, @result ) = $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts );
239 my $relinfo = $opts->{'type'} . ' '
240 . join( ' -> ', $from->{$IDKEY}, $to->{$IDKEY} );
241 warn "Did not add relationship $relinfo: @result";
246 ## Deal with node information (transposition, relationships, etc.) that
247 ## needs to be processed after all the nodes are created.
248 ## TODO unneeded after conversion
249 unless( $use_version ) {
250 # print STDERR "Adding second-pass node data\n";
251 foreach my $nkey ( keys %$extra_data ) {
252 foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
253 my $this_reading = $collation->reading( $nkey );
254 if( $edkey eq $TRANSPOS_KEY ) {
255 my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
256 $this_reading->set_identical( $other_reading );
258 warn "Unfamiliar reading node data $edkey for $nkey";
271 =item * Make this into a stream parser with GraphML
273 =item * Simply field -> attribute correspondence for nodes and edges
275 =item * Share key name constants with Collation.pm
281 This package is free software and is provided "as is" without express
282 or implied warranty. You can redistribute it and/or modify it under
283 the same terms as Perl itself.
287 Tara L Andrews E<lt>aurum@cpan.orgE<gt>