1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
10 Text::Tradition::Parser::GraphML
16 my $t_from_file = Text::Tradition->new(
19 'file' => '/path/to/tradition.xml'
22 my $t_from_string = Text::Tradition->new(
25 'string' => $tradition_xml,
30 Parser module for Text::Tradition to read in its own GraphML output format.
31 GraphML is a relatively simple graph description language; a 'graph' element
32 can have 'node' and 'edge' elements, and each of these can have simple 'data'
33 elements for attributes to be saved.
35 The graph itself has attributes as in the Collation object:
45 =item * wit_list_separator
49 The node objects have the following attributes:
65 The edge objects have the following attributes:
71 =item * witness (for 'path' class edges)
73 =item * extra (for 'path' class edges)
75 =item * relationship (for 'relationship' class edges)
77 =item * equal_rank (for 'relationship' class edges)
79 =item * non_correctable (for 'relationship' class edges)
81 =item * non_independent (for 'relationship' class edges)
89 parse( $graph, $opts );
91 Takes an initialized Text::Tradition object and a set of options; creates
92 the appropriate nodes and edges on the graph. The options hash should
93 include either a 'file' argument or a 'string' argument, depending on the
94 source of the XML to be parsed.
102 binmode STDOUT, ":utf8";
103 binmode STDERR, ":utf8";
104 eval { no warnings; binmode $DB::OUT, ":utf8"; };
106 my $tradition = 't/data/florilegium_graphml.xml';
107 my $t = Text::Tradition->new(
110 'file' => $tradition,
113 ok( $t->$_isa('Text::Tradition'), "Parsed GraphML version 2" );
115 is( scalar $t->collation->readings, 319, "Collation has all readings" );
116 is( scalar $t->collation->paths, 376, "Collation has all paths" );
117 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
121 # the new data is there
122 my $language_enabled = $t->can('language');
123 if( $language_enabled ) {
124 $t->language('Greek');
126 my $stemma_enabled = $t->can('add_stemma');
127 if( $stemma_enabled ) {
128 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
130 $t->collation->add_relationship( 'w12', 'w13',
131 { 'type' => 'grammatical', 'scope' => 'global',
132 'annotation' => 'This is some note' } );
133 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
134 my $graphml_str = $t->collation->as_graphml;
136 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
137 ok( $newt->$_isa('Text::Tradition'), "Parsed current GraphML version" );
139 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
140 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
141 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
142 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
143 if( $language_enabled ) {
144 is( $newt->language, 'Greek', "Tradition has correct language setting" );
146 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
147 ok( $rel, "Found set relationship" );
148 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
149 if( $stemma_enabled ) {
150 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
151 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
155 # Test warning if we can
156 unless( $stemma_enabled ) {
159 $nst = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lexformat.xml' );
160 } [qr/DROPPING stemmata/],
161 "Got expected stemma drop warning on parse";
169 my( $tradition, $opts ) = @_;
171 # Collation data is in the first graph; relationship-specific stuff
173 my( $graph_data, $rel_data ) = graphml_parse( $opts );
175 my $collation = $tradition->collation;
178 # print STDERR "Setting graph globals\n";
179 $tradition->name( $graph_data->{'name'} );
182 my $tmeta = $tradition->meta;
183 my $cmeta = $collation->meta;
184 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
185 my $val = $graph_data->{'global'}->{$gkey};
186 if( $gkey eq 'version' ) {
188 } elsif( $gkey eq 'stemmata' ) {
189 # Make sure we can handle stemmata
190 # Parse the stemmata into objects
191 if( $tradition->can('add_stemma') ) {
192 foreach my $dotstr ( split( /\n/, $val ) ) {
193 $tradition->add_stemma( 'dot' => $dotstr );
196 warn "Analysis module not installed; DROPPING stemmata";
198 } elsif( $gkey eq 'language' ) {
199 if( $tradition->can('language') ) {
200 $tradition->language( $val );
202 warn "Morphology module not installed; DROPPING language";
204 } elsif( $gkey eq 'user' ) {
205 # Assign the tradition to the user if we can
206 if( exists $opts->{'userstore'} ) {
207 my $userdir = delete $opts->{'userstore'};
208 my $user = $userdir->find_user( { username => $val } );
210 $user->add_tradition( $tradition );
212 warn( "Found no user with ID $val; DROPPING user assignment" );
215 warn( "DROPPING user assignment without a specified userstore" );
217 } elsif( $tmeta->has_attribute( $gkey ) ) {
218 $tradition->$gkey( $val );
220 $collation->$gkey( $val );
224 # Add the nodes to the graph.
225 # Note any reading IDs that were changed in order to comply with XML
226 # name restrictions; we have to hardcode start & end.
227 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
229 # print STDERR "Adding collation readings\n";
230 foreach my $n ( @{$graph_data->{'nodes'}} ) {
231 # If it is the start or end node, we already have one, so
232 # grab the rank and go.
233 if( defined $n->{'is_start'} ) {
234 $collation->start->rank($n->{'rank'});
237 if( defined $n->{'is_end'} ) {
238 $collation->end->rank( $n->{'rank'} );
241 my $gnode = $collation->add_reading( $n );
242 if( $gnode->id ne $n->{'id'} ) {
243 $namechange{$n->{'id'}} = $gnode->id;
248 # print STDERR "Adding collation path edges\n";
249 foreach my $e ( @{$graph_data->{'edges'}} ) {
250 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
251 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
252 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
253 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
254 my $from = $collation->reading( $sourceid );
255 my $to = $collation->reading( $targetid );
257 warn "No witness label on path edge!" unless $e->{'witness'};
258 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
259 $collation->add_path( $from, $to, $label );
261 # Add the witness if we don't have it already.
262 unless( $witnesses{$e->{'witness'}} ) {
263 $tradition->add_witness(
264 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
265 $witnesses{$e->{'witness'}} = 1;
267 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
270 ## Done with the main graph, now look at the relationships.
271 # Nodes are added via the call to add_reading above. We only need
272 # add the relationships themselves.
273 # TODO check that scoping does trt
274 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
275 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
276 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
277 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
278 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
279 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
280 my $from = $collation->reading( $sourceid );
281 my $to = $collation->reading( $targetid );
282 delete $e->{'source'};
283 delete $e->{'target'};
284 # The remaining keys are relationship attributes.
285 # Backward compatibility...
286 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
287 delete $e->{'class'};
288 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
290 # Add the specified relationship unless we already have done.
292 if( $e->{'scope'} ne 'local' ) {
293 my $relobj = $collation->get_relationship( $from, $to );
294 if( $relobj && $relobj->scope eq $e->{'scope'}
295 && $relobj->type eq $e->{'type'} ) {
300 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
301 } catch( Text::Tradition::Error $e ) {
302 warn "DROPPING $from -> $to: " . $e->message;
306 # Save the text for each witness so that we can ensure consistency
308 $collation->text_from_paths();
311 ## Return the relationship that comes first in priority.
320 my $key = exists $a->{'type'} ? 'type' : 'relationship';
321 my $at = $LAYERS{$a->{$key}} || 99;
322 my $bt = $LAYERS{$b->{$key}} || 99;
332 =item * Make this into a stream parser with GraphML
334 =item * Simply field -> attribute correspondence for nodes and edges
336 =item * Share key name constants with Collation.pm
342 This package is free software and is provided "as is" without express
343 or implied warranty. You can redistribute it and/or modify it under
344 the same terms as Perl itself.
348 Tara L Andrews E<lt>aurum@cpan.orgE<gt>