1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
10 Text::Tradition::Parser::GraphML
16 my $t_from_file = Text::Tradition->new(
19 'file' => '/path/to/tradition.xml'
22 my $t_from_string = Text::Tradition->new(
25 'string' => $tradition_xml,
30 Parser module for Text::Tradition to read in its own GraphML output format.
31 GraphML is a relatively simple graph description language; a 'graph' element
32 can have 'node' and 'edge' elements, and each of these can have simple 'data'
33 elements for attributes to be saved.
35 The graph itself has attributes as in the Collation object:
45 =item * wit_list_separator
49 The node objects have the following attributes:
65 The edge objects have the following attributes:
71 =item * witness (for 'path' class edges)
73 =item * extra (for 'path' class edges)
75 =item * relationship (for 'relationship' class edges)
77 =item * equal_rank (for 'relationship' class edges)
79 =item * non_correctable (for 'relationship' class edges)
81 =item * non_independent (for 'relationship' class edges)
89 parse( $graph, $opts );
91 Takes an initialized Text::Tradition object and a set of options; creates
92 the appropriate nodes and edges on the graph. The options hash should
93 include either a 'file' argument or a 'string' argument, depending on the
94 source of the XML to be parsed.
99 binmode STDOUT, ":utf8";
100 binmode STDERR, ":utf8";
101 eval { no warnings; binmode $DB::OUT, ":utf8"; };
103 my $tradition = 't/data/florilegium_graphml.xml';
104 my $t = Text::Tradition->new(
107 'file' => $tradition,
110 is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" );
112 is( scalar $t->collation->readings, 319, "Collation has all readings" );
113 is( scalar $t->collation->paths, 376, "Collation has all paths" );
114 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
117 # TODO add a relationship, write graphml, reparse it, check that the rel
119 $t->language('Greek');
120 $t->collation->add_relationship( 'w12', 'w13',
121 { 'type' => 'grammatical', 'scope' => 'global',
122 'annotation' => 'This is some note' } );
123 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
124 my $graphml_str = $t->collation->as_graphml;
126 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
127 is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" );
129 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
130 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
131 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
132 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
133 is( $newt->language, 'Greek', "Tradition has correct language setting" );
134 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
135 ok( $rel, "Found set relationship" );
136 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
145 my( $tradition, $opts ) = @_;
147 # Collation data is in the first graph; relationship-specific stuff
149 my( $graph_data, $rel_data ) = graphml_parse( $opts );
151 my $collation = $tradition->collation;
154 # print STDERR "Setting graph globals\n";
155 $tradition->name( $graph_data->{'name'} );
157 my $tmeta = $tradition->meta;
158 my $cmeta = $collation->meta;
159 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
160 my $val = $graph_data->{'global'}->{$gkey};
161 if( $gkey eq 'version' ) {
163 } elsif( $tmeta->has_attribute( $gkey ) ) {
164 $tradition->$gkey( $val );
166 $collation->$gkey( $val );
170 # Add the nodes to the graph.
171 # Note any reading IDs that were changed in order to comply with XML
172 # name restrictions; we have to hardcode start & end.
173 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
175 # print STDERR "Adding collation readings\n";
176 foreach my $n ( @{$graph_data->{'nodes'}} ) {
177 # If it is the start or end node, we already have one, so
178 # grab the rank and go.
179 next if( defined $n->{'is_start'} );
180 if( defined $n->{'is_end'} ) {
181 $collation->end->rank( $n->{'rank'} );
184 my $gnode = $collation->add_reading( $n );
185 if( $gnode->id ne $n->{'id'} ) {
186 $namechange{$n->{'id'}} = $gnode->id;
191 # print STDERR "Adding collation path edges\n";
192 foreach my $e ( @{$graph_data->{'edges'}} ) {
193 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
194 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
195 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
196 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
197 my $from = $collation->reading( $sourceid );
198 my $to = $collation->reading( $targetid );
200 warn "No witness label on path edge!" unless $e->{'witness'};
201 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
202 $collation->add_path( $from, $to, $label );
204 # Add the witness if we don't have it already.
205 unless( $witnesses{$e->{'witness'}} ) {
206 $tradition->add_witness(
207 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
208 $witnesses{$e->{'witness'}} = 1;
210 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
213 ## Done with the main graph, now look at the relationships.
214 # Nodes are added via the call to add_reading above. We only need
215 # add the relationships themselves.
216 # TODO check that scoping does trt
217 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
218 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
219 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
220 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
221 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
222 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
223 my $from = $collation->reading( $sourceid );
224 my $to = $collation->reading( $targetid );
225 delete $e->{'source'};
226 delete $e->{'target'};
227 # The remaining keys are relationship attributes.
228 # Backward compatibility...
229 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
230 delete $e->{'class'};
231 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
233 # Add the specified relationship unless we already have done.
235 if( $e->{'scope'} ne 'local' ) {
236 my $relobj = $collation->get_relationship( $from, $to );
237 if( $relobj && $relobj->scope eq $e->{'scope'}
238 && $relobj->type eq $e->{'type'} ) {
243 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
244 } catch( Text::Tradition::Error $e ) {
245 warn "DROPPING $from -> $to: " . $e->message;
249 # Save the text for each witness so that we can ensure consistency
251 $collation->text_from_paths();
254 ## Return the relationship that comes first in priority.
263 my $key = exists $a->{'type'} ? 'type' : 'relationship';
264 my $at = $LAYERS{$a->{$key}} || 99;
265 my $bt = $LAYERS{$b->{$key}} || 99;
275 =item * Make this into a stream parser with GraphML
277 =item * Simply field -> attribute correspondence for nodes and edges
279 =item * Share key name constants with Collation.pm
285 This package is free software and is provided "as is" without express
286 or implied warranty. You can redistribute it and/or modify it under
287 the same terms as Perl itself.
291 Tara L Andrews E<lt>aurum@cpan.orgE<gt>