1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
6 use Text::Tradition::UserStore;
11 Text::Tradition::Parser::GraphML
17 my $t_from_file = Text::Tradition->new(
20 'file' => '/path/to/tradition.xml'
23 my $t_from_string = Text::Tradition->new(
26 'string' => $tradition_xml,
31 Parser module for Text::Tradition to read in its own GraphML output format.
32 GraphML is a relatively simple graph description language; a 'graph' element
33 can have 'node' and 'edge' elements, and each of these can have simple 'data'
34 elements for attributes to be saved.
36 The graph itself has attributes as in the Collation object:
46 =item * wit_list_separator
50 The node objects have the following attributes:
66 The edge objects have the following attributes:
72 =item * witness (for 'path' class edges)
74 =item * extra (for 'path' class edges)
76 =item * relationship (for 'relationship' class edges)
78 =item * equal_rank (for 'relationship' class edges)
80 =item * non_correctable (for 'relationship' class edges)
82 =item * non_independent (for 'relationship' class edges)
90 parse( $graph, $opts );
92 Takes an initialized Text::Tradition object and a set of options; creates
93 the appropriate nodes and edges on the graph. The options hash should
94 include either a 'file' argument or a 'string' argument, depending on the
95 source of the XML to be parsed.
104 binmode STDOUT, ":utf8";
105 binmode STDERR, ":utf8";
106 eval { no warnings; binmode $DB::OUT, ":utf8"; };
108 my $tradition = 't/data/florilegium_graphml.xml';
109 my $t = Text::Tradition->new(
112 'file' => $tradition,
115 ok( $t->$_isa('Text::Tradition'), "Parsed GraphML version 2" );
117 is( scalar $t->collation->readings, 319, "Collation has all readings" );
118 is( scalar $t->collation->paths, 376, "Collation has all paths" );
119 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
122 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
123 # the new data is there
124 $t->language('Greek');
127 $stemma_enabled = $t->enable_stemmata;
129 ok( 1, "Skipping stemma tests without Analysis module" );
131 if( $stemma_enabled ) {
132 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
134 $t->collation->add_relationship( 'w12', 'w13',
135 { 'type' => 'grammatical', 'scope' => 'global',
136 'annotation' => 'This is some note' } );
137 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
138 my $graphml_str = $t->collation->as_graphml;
140 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
141 ok( $newt->$_isa('Text::Tradition'), "Parsed current GraphML version" );
143 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
144 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
145 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
146 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
147 is( $newt->language, 'Greek', "Tradition has correct language setting" );
148 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
149 ok( $rel, "Found set relationship" );
150 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
151 if( $stemma_enabled ) {
152 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
153 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
157 # Test user save / restore
158 my $fh = File::Temp->new();
159 my $file = $fh->filename;
161 my $dsn = "dbi:SQLite:dbname=$file";
162 my $userstore = Text::Tradition::Directory->new( { dsn => $dsn,
163 extra_args => { create => 1 } } );
164 my $scope = $userstore->new_scope();
165 my $testuser = $userstore->create_user( { url => 'http://example.com' } );
166 ok( $testuser->$_isa('Text::Tradition::User'), "Created test user via userstore" );
167 $testuser->add_tradition( $newt );
168 is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
169 $graphml_str = $newt->collation->as_graphml;
172 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
173 } 'DROPPING user assignment without a specified userstore',
174 "Got expected user drop warning on parse";
175 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
176 'userstore' => $userstore );
177 is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
179 # Test warning if we can
180 unless( $stemma_enabled ) {
183 $nst = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lexformat.xml' );
184 } [qr/DROPPING stemmata/],
185 "Got expected stemma drop warning on parse";
193 my( $tradition, $opts ) = @_;
195 # Collation data is in the first graph; relationship-specific stuff
197 my( $graph_data, $rel_data ) = graphml_parse( $opts );
199 my $collation = $tradition->collation;
202 # print STDERR "Setting graph globals\n";
203 $tradition->name( $graph_data->{'name'} );
206 my $tmeta = $tradition->meta;
207 my $cmeta = $collation->meta;
208 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
209 my $val = $graph_data->{'global'}->{$gkey};
210 if( $gkey eq 'version' ) {
212 } elsif( $gkey eq 'stemmata' ) {
213 # Make sure we can handle stemmata
216 $stemma_enabled = $tradition->enable_stemmata;
218 warn "Analysis module not installed; DROPPING stemmata";
220 # Parse the stemmata into objects
221 if( $stemma_enabled ) {
222 foreach my $dotstr ( split( /\n/, $val ) ) {
223 $tradition->add_stemma( 'dot' => $dotstr );
226 } elsif( $gkey eq 'user' ) {
227 # Assign the tradition to the user if we can
228 if( exists $opts->{'userstore'} ) {
229 my $userdir = delete $opts->{'userstore'};
230 my $user = $userdir->find_user( { username => $val } );
232 $user->add_tradition( $tradition );
234 warn( "Found no user with ID $val; DROPPING user assignment" );
237 warn( "DROPPING user assignment without a specified userstore" );
239 } elsif( $tmeta->has_attribute( $gkey ) ) {
240 $tradition->$gkey( $val );
242 $collation->$gkey( $val );
246 # Add the nodes to the graph.
247 # Note any reading IDs that were changed in order to comply with XML
248 # name restrictions; we have to hardcode start & end.
249 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
251 # print STDERR "Adding collation readings\n";
252 foreach my $n ( @{$graph_data->{'nodes'}} ) {
253 # If it is the start or end node, we already have one, so
254 # grab the rank and go.
255 if( defined $n->{'is_start'} ) {
256 # warn Data::Dump::dump($n);
257 # warn $collation->start->id;
258 $collation->start->rank($n->{'rank'});
261 if( defined $n->{'is_end'} ) {
262 # warn Data::Dump::dump($n);
263 $collation->end->rank( $n->{'rank'} );
266 my $gnode = $collation->add_reading( $n );
267 if( $gnode->id ne $n->{'id'} ) {
268 $namechange{$n->{'id'}} = $gnode->id;
273 # print STDERR "Adding collation path edges\n";
274 foreach my $e ( @{$graph_data->{'edges'}} ) {
275 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
276 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
277 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
278 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
279 my $from = $collation->reading( $sourceid );
280 my $to = $collation->reading( $targetid );
282 warn "No witness label on path edge!" unless $e->{'witness'};
283 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
284 $collation->add_path( $from, $to, $label );
286 # Add the witness if we don't have it already.
287 unless( $witnesses{$e->{'witness'}} ) {
288 $tradition->add_witness(
289 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
290 $witnesses{$e->{'witness'}} = 1;
292 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
295 ## Done with the main graph, now look at the relationships.
296 # Nodes are added via the call to add_reading above. We only need
297 # add the relationships themselves.
298 # TODO check that scoping does trt
299 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
300 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
301 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
302 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
303 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
304 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
305 my $from = $collation->reading( $sourceid );
306 my $to = $collation->reading( $targetid );
307 delete $e->{'source'};
308 delete $e->{'target'};
309 # The remaining keys are relationship attributes.
310 # Backward compatibility...
311 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
312 delete $e->{'class'};
313 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
315 # Add the specified relationship unless we already have done.
317 if( $e->{'scope'} ne 'local' ) {
318 my $relobj = $collation->get_relationship( $from, $to );
319 if( $relobj && $relobj->scope eq $e->{'scope'}
320 && $relobj->type eq $e->{'type'} ) {
325 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
326 } catch( Text::Tradition::Error $e ) {
327 warn "DROPPING $from -> $to: " . $e->message;
331 # Save the text for each witness so that we can ensure consistency
333 $collation->text_from_paths();
336 ## Return the relationship that comes first in priority.
345 my $key = exists $a->{'type'} ? 'type' : 'relationship';
346 my $at = $LAYERS{$a->{$key}} || 99;
347 my $bt = $LAYERS{$b->{$key}} || 99;
357 =item * Make this into a stream parser with GraphML
359 =item * Simply field -> attribute correspondence for nodes and edges
361 =item * Share key name constants with Collation.pm
367 This package is free software and is provided "as is" without express
368 or implied warranty. You can redistribute it and/or modify it under
369 the same terms as Perl itself.
373 Tara L Andrews E<lt>aurum@cpan.orgE<gt>