1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
10 Text::Tradition::Parser::GraphML
16 my $t_from_file = Text::Tradition->new(
19 'file' => '/path/to/tradition.xml'
22 my $t_from_string = Text::Tradition->new(
25 'string' => $tradition_xml,
30 Parser module for Text::Tradition to read in its own GraphML output format.
31 GraphML is a relatively simple graph description language; a 'graph' element
32 can have 'node' and 'edge' elements, and each of these can have simple 'data'
33 elements for attributes to be saved.
35 The graph itself has attributes as in the Collation object:
45 =item * wit_list_separator
49 The node objects have the following attributes:
65 The edge objects have the following attributes:
71 =item * witness (for 'path' class edges)
73 =item * extra (for 'path' class edges)
75 =item * relationship (for 'relationship' class edges)
77 =item * equal_rank (for 'relationship' class edges)
79 =item * non_correctable (for 'relationship' class edges)
81 =item * non_independent (for 'relationship' class edges)
89 parse( $graph, $opts );
91 Takes an initialized Text::Tradition object and a set of options; creates
92 the appropriate nodes and edges on the graph. The options hash should
93 include either a 'file' argument or a 'string' argument, depending on the
94 source of the XML to be parsed.
102 binmode STDOUT, ":utf8";
103 binmode STDERR, ":utf8";
104 eval { no warnings; binmode $DB::OUT, ":utf8"; };
106 my $tradition = 't/data/florilegium_graphml.xml';
107 my $t = Text::Tradition->new(
110 'file' => $tradition,
113 ok( $t->$_isa('Text::Tradition'), "Parsed GraphML version 2" );
115 is( scalar $t->collation->readings, 319, "Collation has all readings" );
116 is( scalar $t->collation->paths, 376, "Collation has all paths" );
117 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
121 # the new data is there
122 my $language_enabled = $t->can('language');
123 if( $language_enabled ) {
124 $t->language('Greek');
126 my $stemma_enabled = $t->can('add_stemma');
127 if( $stemma_enabled ) {
128 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
130 $t->collation->add_relationship( 'w12', 'w13',
131 { 'type' => 'grammatical', 'scope' => 'global',
132 'annotation' => 'This is some note' } );
133 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
134 my $graphml_str = $t->collation->as_graphml;
136 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
137 ok( $newt->$_isa('Text::Tradition'), "Parsed current GraphML version" );
139 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
140 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
141 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
142 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
143 if( $language_enabled ) {
144 is( $newt->language, 'Greek', "Tradition has correct language setting" );
146 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
147 ok( $rel, "Found set relationship" );
148 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
149 if( $stemma_enabled ) {
150 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
151 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
155 # Test warning if we can
156 unless( $stemma_enabled ) {
159 $nst = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lexformat.xml' );
160 } [qr/DROPPING stemmata/],
161 "Got expected stemma drop warning on parse";
169 my( $tradition, $opts ) = @_;
171 # Collation data is in the first graph; relationship-specific stuff
173 my( $graph_data, $rel_data ) = graphml_parse( $opts );
175 my $collation = $tradition->collation;
178 # print STDERR "Setting graph globals\n";
179 $tradition->name( $graph_data->{'name'} );
182 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
183 my $val = $graph_data->{'global'}->{$gkey};
184 if( $gkey eq 'version' ) {
186 } elsif( $gkey eq 'stemmata' ) {
187 # Make sure we can handle stemmata
188 # Parse the stemmata into objects
189 if( $tradition->can('add_stemma') ) {
190 foreach my $dotstr ( split( /\n/, $val ) ) {
191 $tradition->add_stemma( 'dot' => $dotstr );
194 warn "Analysis module not installed; DROPPING stemmata";
196 } elsif( $gkey eq 'user' ) {
197 # Assign the tradition to the user if we can
198 if( exists $opts->{'userstore'} ) {
199 my $userdir = delete $opts->{'userstore'};
200 my $user = $userdir->find_user( { username => $val } );
202 $user->add_tradition( $tradition );
204 warn( "Found no user with ID $val; DROPPING user assignment" );
207 warn( "DROPPING user assignment without a specified userstore" );
209 } elsif( $tradition->can( $gkey ) ) {
210 $tradition->$gkey( $val );
211 } elsif( $collation->can( $gkey ) ) {
212 $collation->$gkey( $val );
214 warn( "DROPPING unsupported attribute $gkey" );
218 # Add the nodes to the graph.
219 # Note any reading IDs that were changed in order to comply with XML
220 # name restrictions; we have to hardcode start & end.
221 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
223 # print STDERR "Adding collation readings\n";
224 foreach my $n ( @{$graph_data->{'nodes'}} ) {
225 # If it is the start or end node, we already have one, so
226 # grab the rank and go.
227 if( defined $n->{'is_start'} ) {
228 $collation->start->rank($n->{'rank'});
231 if( defined $n->{'is_end'} ) {
232 $collation->end->rank( $n->{'rank'} );
235 my $gnode = $collation->add_reading( $n );
236 if( $gnode->id ne $n->{'id'} ) {
237 $namechange{$n->{'id'}} = $gnode->id;
242 # print STDERR "Adding collation path edges\n";
243 foreach my $e ( @{$graph_data->{'edges'}} ) {
244 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
245 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
246 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
247 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
248 my $from = $collation->reading( $sourceid );
249 my $to = $collation->reading( $targetid );
251 warn "No witness label on path edge!" unless $e->{'witness'};
252 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
253 $collation->add_path( $from, $to, $label );
255 # Add the witness if we don't have it already.
256 unless( $witnesses{$e->{'witness'}} ) {
257 $tradition->add_witness(
258 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
259 $witnesses{$e->{'witness'}} = 1;
261 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
264 ## Done with the main graph, now look at the relationships.
265 # Nodes are added via the call to add_reading above. We only need
266 # add the relationships themselves.
267 # TODO check that scoping does trt
268 $tradition->_init_done( 1 ); # so that relationships get validated
269 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
270 # Backward compatibility...
271 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
272 foreach my $e ( @{$rel_data->{'edges'}} ) {
273 delete $e->{'class'};
274 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
278 my $rg = $collation->relations;
279 foreach my $e ( sort { _apply_relationship_order( $a, $b, $rg ) }
280 @{$rel_data->{'edges'}} ) {
281 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
282 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
283 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
284 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
285 my $from = $collation->reading( $sourceid );
286 my $to = $collation->reading( $targetid );
287 delete $e->{'source'};
288 delete $e->{'target'};
289 # The remaining keys are relationship attributes.
290 # Add the specified relationship unless we already have done.
292 if( $e->{'scope'} ne 'local' ) {
293 my $relobj = $collation->get_relationship( $from, $to );
294 if( $relobj && $relobj->scope eq $e->{'scope'}
295 && $relobj->type eq $e->{'type'} ) {
298 # Don't propagate the relationship; all the propagations are
299 # already in the XML.
300 $e->{'thispaironly'} = 1;
304 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
305 } catch( Text::Tradition::Error $err ) {
306 warn "DROPPING " . $e->{type} . " rel on $from -> $to: " . $err->message;
310 # Save the text for each witness so that we can ensure consistency
312 $collation->text_from_paths();
315 # Helper sort function for applying the saved relationships in a
317 sub _apply_relationship_order {
318 my( $a, $b, $rg ) = @_;
319 my $at = $rg->type( $a->{type} ); my $bt = $rg->type( $b->{type} );
320 # Apply strong relationships before weak
321 return -1 if $bt->is_weak && !$at->is_weak;
322 return 1 if $at->is_weak && !$bt->is_weak;
323 # Apply more tightly bound relationships first
324 my $blcmp = $at->bindlevel <=> $bt->bindlevel;
325 return $blcmp if $blcmp;
326 # Apply local before global
327 return -1 if $a->{scope} eq 'local' && $b->{scope} ne 'local';
328 return 1 if $b->{scope} eq 'local' && $a->{scope} ne 'local';
337 =item * Make this into a stream parser with GraphML
343 This package is free software and is provided "as is" without express
344 or implied warranty. You can redistribute it and/or modify it under
345 the same terms as Perl itself.
349 Tara L Andrews E<lt>aurum@cpan.orgE<gt>