1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
10 Text::Tradition::Parser::GraphML
16 my $t_from_file = Text::Tradition->new(
19 'file' => '/path/to/tradition.xml'
22 my $t_from_string = Text::Tradition->new(
25 'string' => $tradition_xml,
30 Parser module for Text::Tradition to read in its own GraphML output format.
31 GraphML is a relatively simple graph description language; a 'graph' element
32 can have 'node' and 'edge' elements, and each of these can have simple 'data'
33 elements for attributes to be saved.
35 The graph itself has attributes as in the Collation object:
45 =item * wit_list_separator
49 The node objects have the following attributes:
65 The edge objects have the following attributes:
71 =item * witness (for 'path' class edges)
73 =item * extra (for 'path' class edges)
75 =item * relationship (for 'relationship' class edges)
77 =item * equal_rank (for 'relationship' class edges)
79 =item * non_correctable (for 'relationship' class edges)
81 =item * non_independent (for 'relationship' class edges)
89 parse( $graph, $opts );
91 Takes an initialized Text::Tradition object and a set of options; creates
92 the appropriate nodes and edges on the graph. The options hash should
93 include either a 'file' argument or a 'string' argument, depending on the
94 source of the XML to be parsed.
102 use Text::Tradition::Directory;
104 binmode STDOUT, ":utf8";
105 binmode STDERR, ":utf8";
106 eval { no warnings; binmode $DB::OUT, ":utf8"; };
108 my $tradition = 't/data/florilegium_graphml.xml';
109 my $t = Text::Tradition->new(
112 'file' => $tradition,
115 ok( $t->$_isa('Text::Tradition'), "Parsed GraphML version 2" );
117 is( scalar $t->collation->readings, 319, "Collation has all readings" );
118 is( scalar $t->collation->paths, 376, "Collation has all paths" );
119 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
122 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
123 # the new data is there
124 $t->language('Greek');
125 my $stemma_enabled = $t->can('add_stemma');
126 if( $stemma_enabled ) {
127 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
129 $t->collation->add_relationship( 'w12', 'w13',
130 { 'type' => 'grammatical', 'scope' => 'global',
131 'annotation' => 'This is some note' } );
132 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
133 my $graphml_str = $t->collation->as_graphml;
135 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
136 ok( $newt->$_isa('Text::Tradition'), "Parsed current GraphML version" );
138 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
139 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
140 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
141 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
142 is( $newt->language, 'Greek', "Tradition has correct language setting" );
143 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
144 ok( $rel, "Found set relationship" );
145 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
146 if( $stemma_enabled ) {
147 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
148 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
152 # Test user save / restore
153 my $fh = File::Temp->new();
154 my $file = $fh->filename;
156 my $dsn = "dbi:SQLite:dbname=$file";
157 my $userstore = Text::Tradition::Directory->new( { dsn => $dsn,
158 extra_args => { create => 1 } } );
159 my $scope = $userstore->new_scope();
160 my $testuser = $userstore->create_user( { url => 'http://example.com' } );
161 ok( $testuser->$_isa('Text::Tradition::User'), "Created test user via userstore" );
162 $testuser->add_tradition( $newt );
163 is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
164 $graphml_str = $newt->collation->as_graphml;
167 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
168 } 'DROPPING user assignment without a specified userstore',
169 "Got expected user drop warning on parse";
170 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
171 'userstore' => $userstore );
172 is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
174 # Test warning if we can
175 unless( $stemma_enabled ) {
178 $nst = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lexformat.xml' );
179 } [qr/DROPPING stemmata/],
180 "Got expected stemma drop warning on parse";
188 my( $tradition, $opts ) = @_;
190 # Collation data is in the first graph; relationship-specific stuff
192 my( $graph_data, $rel_data ) = graphml_parse( $opts );
194 my $collation = $tradition->collation;
197 # print STDERR "Setting graph globals\n";
198 $tradition->name( $graph_data->{'name'} );
201 my $tmeta = $tradition->meta;
202 my $cmeta = $collation->meta;
203 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
204 my $val = $graph_data->{'global'}->{$gkey};
205 if( $gkey eq 'version' ) {
207 } elsif( $gkey eq 'stemmata' ) {
208 # Make sure we can handle stemmata
209 # Parse the stemmata into objects
210 if( $tradition->can('add_stemma') ) {
211 foreach my $dotstr ( split( /\n/, $val ) ) {
212 $tradition->add_stemma( 'dot' => $dotstr );
215 warn "Analysis module not installed; DROPPING stemmata";
217 } elsif( $gkey eq 'user' ) {
218 # Assign the tradition to the user if we can
219 if( exists $opts->{'userstore'} ) {
220 my $userdir = delete $opts->{'userstore'};
221 my $user = $userdir->find_user( { username => $val } );
223 $user->add_tradition( $tradition );
225 warn( "Found no user with ID $val; DROPPING user assignment" );
228 warn( "DROPPING user assignment without a specified userstore" );
230 } elsif( $tmeta->has_attribute( $gkey ) ) {
231 $tradition->$gkey( $val );
233 $collation->$gkey( $val );
237 # Add the nodes to the graph.
238 # Note any reading IDs that were changed in order to comply with XML
239 # name restrictions; we have to hardcode start & end.
240 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
242 # print STDERR "Adding collation readings\n";
243 foreach my $n ( @{$graph_data->{'nodes'}} ) {
244 # If it is the start or end node, we already have one, so
245 # grab the rank and go.
246 if( defined $n->{'is_start'} ) {
247 $collation->start->rank($n->{'rank'});
250 if( defined $n->{'is_end'} ) {
251 $collation->end->rank( $n->{'rank'} );
254 my $gnode = $collation->add_reading( $n );
255 if( $gnode->id ne $n->{'id'} ) {
256 $namechange{$n->{'id'}} = $gnode->id;
261 # print STDERR "Adding collation path edges\n";
262 foreach my $e ( @{$graph_data->{'edges'}} ) {
263 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
264 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
265 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
266 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
267 my $from = $collation->reading( $sourceid );
268 my $to = $collation->reading( $targetid );
270 warn "No witness label on path edge!" unless $e->{'witness'};
271 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
272 $collation->add_path( $from, $to, $label );
274 # Add the witness if we don't have it already.
275 unless( $witnesses{$e->{'witness'}} ) {
276 $tradition->add_witness(
277 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
278 $witnesses{$e->{'witness'}} = 1;
280 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
283 ## Done with the main graph, now look at the relationships.
284 # Nodes are added via the call to add_reading above. We only need
285 # add the relationships themselves.
286 # TODO check that scoping does trt
287 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
288 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
289 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
290 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
291 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
292 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
293 my $from = $collation->reading( $sourceid );
294 my $to = $collation->reading( $targetid );
295 delete $e->{'source'};
296 delete $e->{'target'};
297 # The remaining keys are relationship attributes.
298 # Backward compatibility...
299 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
300 delete $e->{'class'};
301 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
303 # Add the specified relationship unless we already have done.
305 if( $e->{'scope'} ne 'local' ) {
306 my $relobj = $collation->get_relationship( $from, $to );
307 if( $relobj && $relobj->scope eq $e->{'scope'}
308 && $relobj->type eq $e->{'type'} ) {
313 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
314 } catch( Text::Tradition::Error $e ) {
315 warn "DROPPING $from -> $to: " . $e->message;
319 # Save the text for each witness so that we can ensure consistency
321 $collation->text_from_paths();
324 ## Return the relationship that comes first in priority.
333 my $key = exists $a->{'type'} ? 'type' : 'relationship';
334 my $at = $LAYERS{$a->{$key}} || 99;
335 my $bt = $LAYERS{$b->{$key}} || 99;
345 =item * Make this into a stream parser with GraphML
347 =item * Simply field -> attribute correspondence for nodes and edges
349 =item * Share key name constants with Collation.pm
355 This package is free software and is provided "as is" without express
356 or implied warranty. You can redistribute it and/or modify it under
357 the same terms as Perl itself.
361 Tara L Andrews E<lt>aurum@cpan.orgE<gt>