1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
10 Text::Tradition::Parser::GraphML
16 my $t_from_file = Text::Tradition->new(
19 'file' => '/path/to/tradition.xml'
22 my $t_from_string = Text::Tradition->new(
25 'string' => $tradition_xml,
30 Parser module for Text::Tradition to read in its own GraphML output format.
31 GraphML is a relatively simple graph description language; a 'graph' element
32 can have 'node' and 'edge' elements, and each of these can have simple 'data'
33 elements for attributes to be saved.
35 The graph itself has attributes as in the Collation object:
45 =item * wit_list_separator
49 The node objects have the following attributes:
65 The edge objects have the following attributes:
71 =item * witness (for 'path' class edges)
73 =item * extra (for 'path' class edges)
75 =item * relationship (for 'relationship' class edges)
77 =item * equal_rank (for 'relationship' class edges)
79 =item * non_correctable (for 'relationship' class edges)
81 =item * non_independent (for 'relationship' class edges)
89 parse( $graph, $opts );
91 Takes an initialized Text::Tradition object and a set of options; creates
92 the appropriate nodes and edges on the graph. The options hash should
93 include either a 'file' argument or a 'string' argument, depending on the
94 source of the XML to be parsed.
99 binmode STDOUT, ":utf8";
100 binmode STDERR, ":utf8";
101 eval { no warnings; binmode $DB::OUT, ":utf8"; };
103 my $tradition = 't/data/florilegium_graphml.xml';
104 my $t = Text::Tradition->new(
107 'file' => $tradition,
110 is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" );
112 is( scalar $t->collation->readings, 319, "Collation has all readings" );
113 is( scalar $t->collation->paths, 376, "Collation has all paths" );
114 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
117 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
118 # the new data is there
119 $t->language('Greek');
120 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
121 $t->collation->add_relationship( 'w12', 'w13',
122 { 'type' => 'grammatical', 'scope' => 'global',
123 'annotation' => 'This is some note' } );
124 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
125 my $graphml_str = $t->collation->as_graphml;
127 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
128 is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" );
130 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
131 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
132 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
133 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
134 is( $newt->language, 'Greek', "Tradition has correct language setting" );
135 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
136 ok( $rel, "Found set relationship" );
137 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
138 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
139 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
148 my( $tradition, $opts ) = @_;
150 # Collation data is in the first graph; relationship-specific stuff
152 my( $graph_data, $rel_data ) = graphml_parse( $opts );
154 my $collation = $tradition->collation;
157 # print STDERR "Setting graph globals\n";
158 $tradition->name( $graph_data->{'name'} );
160 my $tmeta = $tradition->meta;
161 my $cmeta = $collation->meta;
162 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
163 my $val = $graph_data->{'global'}->{$gkey};
164 if( $gkey eq 'version' ) {
166 } elsif( $gkey eq 'stemmata' ) { # Special case, yuck
167 foreach my $dotstr ( split( /\n/, $val ) ) {
168 $tradition->add_stemma( 'dot' => $dotstr );
170 } elsif( $tmeta->has_attribute( $gkey ) ) {
171 $tradition->$gkey( $val );
173 $collation->$gkey( $val );
177 # Add the nodes to the graph.
178 # Note any reading IDs that were changed in order to comply with XML
179 # name restrictions; we have to hardcode start & end.
180 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
182 # print STDERR "Adding collation readings\n";
183 foreach my $n ( @{$graph_data->{'nodes'}} ) {
184 # If it is the start or end node, we already have one, so
185 # grab the rank and go.
186 next if( defined $n->{'is_start'} );
187 if( defined $n->{'is_end'} ) {
188 $collation->end->rank( $n->{'rank'} );
191 my $gnode = $collation->add_reading( $n );
192 if( $gnode->id ne $n->{'id'} ) {
193 $namechange{$n->{'id'}} = $gnode->id;
198 # print STDERR "Adding collation path edges\n";
199 foreach my $e ( @{$graph_data->{'edges'}} ) {
200 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
201 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
202 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
203 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
204 my $from = $collation->reading( $sourceid );
205 my $to = $collation->reading( $targetid );
207 warn "No witness label on path edge!" unless $e->{'witness'};
208 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
209 $collation->add_path( $from, $to, $label );
211 # Add the witness if we don't have it already.
212 unless( $witnesses{$e->{'witness'}} ) {
213 $tradition->add_witness(
214 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
215 $witnesses{$e->{'witness'}} = 1;
217 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
220 ## Done with the main graph, now look at the relationships.
221 # Nodes are added via the call to add_reading above. We only need
222 # add the relationships themselves.
223 # TODO check that scoping does trt
224 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
225 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
226 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
227 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
228 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
229 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
230 my $from = $collation->reading( $sourceid );
231 my $to = $collation->reading( $targetid );
232 delete $e->{'source'};
233 delete $e->{'target'};
234 # The remaining keys are relationship attributes.
235 # Backward compatibility...
236 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
237 delete $e->{'class'};
238 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
240 # Add the specified relationship unless we already have done.
242 if( $e->{'scope'} ne 'local' ) {
243 my $relobj = $collation->get_relationship( $from, $to );
244 if( $relobj && $relobj->scope eq $e->{'scope'}
245 && $relobj->type eq $e->{'type'} ) {
250 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
251 } catch( Text::Tradition::Error $e ) {
252 warn "DROPPING $from -> $to: " . $e->message;
256 # Save the text for each witness so that we can ensure consistency
258 $collation->text_from_paths();
261 ## Return the relationship that comes first in priority.
270 my $key = exists $a->{'type'} ? 'type' : 'relationship';
271 my $at = $LAYERS{$a->{$key}} || 99;
272 my $bt = $LAYERS{$b->{$key}} || 99;
282 =item * Make this into a stream parser with GraphML
284 =item * Simply field -> attribute correspondence for nodes and edges
286 =item * Share key name constants with Collation.pm
292 This package is free software and is provided "as is" without express
293 or implied warranty. You can redistribute it and/or modify it under
294 the same terms as Perl itself.
298 Tara L Andrews E<lt>aurum@cpan.orgE<gt>