1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
10 Text::Tradition::Parser::GraphML
16 my $t_from_file = Text::Tradition->new(
19 'file' => '/path/to/tradition.xml'
22 my $t_from_string = Text::Tradition->new(
25 'string' => $tradition_xml,
30 Parser module for Text::Tradition to read in its own GraphML output format.
31 GraphML is a relatively simple graph description language; a 'graph' element
32 can have 'node' and 'edge' elements, and each of these can have simple 'data'
33 elements for attributes to be saved.
35 The graph itself has attributes as in the Collation object:
45 =item * wit_list_separator
49 The node objects have the following attributes:
65 The edge objects have the following attributes:
71 =item * witness (for 'path' class edges)
73 =item * extra (for 'path' class edges)
75 =item * relationship (for 'relationship' class edges)
77 =item * equal_rank (for 'relationship' class edges)
79 =item * non_correctable (for 'relationship' class edges)
81 =item * non_independent (for 'relationship' class edges)
89 parse( $graph, $opts );
91 Takes an initialized Text::Tradition object and a set of options; creates
92 the appropriate nodes and edges on the graph. The options hash should
93 include either a 'file' argument or a 'string' argument, depending on the
94 source of the XML to be parsed.
102 use Text::Tradition::Directory;
104 binmode STDOUT, ":utf8";
105 binmode STDERR, ":utf8";
106 eval { no warnings; binmode $DB::OUT, ":utf8"; };
108 my $tradition = 't/data/florilegium_graphml.xml';
109 my $t = Text::Tradition->new(
112 'file' => $tradition,
115 ok( $t->$_isa('Text::Tradition'), "Parsed GraphML version 2" );
117 is( scalar $t->collation->readings, 319, "Collation has all readings" );
118 is( scalar $t->collation->paths, 376, "Collation has all paths" );
119 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
122 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
123 # the new data is there
124 $t->language('Greek');
127 $stemma_enabled = $t->enable_stemmata;
129 ok( 1, "Skipping stemma tests without Analysis module" );
131 if( $stemma_enabled ) {
132 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
134 $t->collation->add_relationship( 'w12', 'w13',
135 { 'type' => 'grammatical', 'scope' => 'global',
136 'annotation' => 'This is some note' } );
137 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
138 my $graphml_str = $t->collation->as_graphml;
140 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
141 ok( $newt->$_isa('Text::Tradition'), "Parsed current GraphML version" );
143 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
144 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
145 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
146 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
147 is( $newt->language, 'Greek', "Tradition has correct language setting" );
148 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
149 ok( $rel, "Found set relationship" );
150 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
151 if( $stemma_enabled ) {
152 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
153 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
157 # Test user save / restore
158 my $fh = File::Temp->new();
159 my $file = $fh->filename;
161 my $dsn = "dbi:SQLite:dbname=$file";
162 my $userstore = Text::Tradition::Directory->new( { dsn => $dsn,
163 extra_args => { create => 1 } } );
164 my $scope = $userstore->new_scope();
165 my $testuser = $userstore->create_user( { url => 'http://example.com' } );
166 ok( $testuser->$_isa('Text::Tradition::User'), "Created test user via userstore" );
167 $testuser->add_tradition( $newt );
168 is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
169 $graphml_str = $newt->collation->as_graphml;
172 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
173 } 'DROPPING user assignment without a specified userstore',
174 "Got expected user drop warning on parse";
175 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
176 'userstore' => $userstore );
177 is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
179 # Test warning if we can
180 unless( $stemma_enabled ) {
183 $nst = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lexformat.xml' );
184 } [qr/DROPPING stemmata/],
185 "Got expected stemma drop warning on parse";
193 my( $tradition, $opts ) = @_;
195 # Collation data is in the first graph; relationship-specific stuff
197 my( $graph_data, $rel_data ) = graphml_parse( $opts );
199 my $collation = $tradition->collation;
202 # print STDERR "Setting graph globals\n";
203 $tradition->name( $graph_data->{'name'} );
206 my $tmeta = $tradition->meta;
207 my $cmeta = $collation->meta;
208 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
209 my $val = $graph_data->{'global'}->{$gkey};
210 if( $gkey eq 'version' ) {
212 } elsif( $gkey eq 'stemmata' ) {
213 # Make sure we can handle stemmata
216 $stemma_enabled = $tradition->enable_stemmata;
218 warn "Analysis module not installed; DROPPING stemmata";
220 # Parse the stemmata into objects
221 if( $stemma_enabled ) {
222 foreach my $dotstr ( split( /\n/, $val ) ) {
223 $tradition->add_stemma( 'dot' => $dotstr );
226 } elsif( $gkey eq 'user' ) {
227 # Assign the tradition to the user if we can
228 if( exists $opts->{'userstore'} ) {
229 my $userdir = delete $opts->{'userstore'};
230 my $user = $userdir->find_user( { username => $val } );
232 $user->add_tradition( $tradition );
234 warn( "Found no user with ID $val; DROPPING user assignment" );
237 warn( "DROPPING user assignment without a specified userstore" );
239 } elsif( $tmeta->has_attribute( $gkey ) ) {
240 $tradition->$gkey( $val );
242 $collation->$gkey( $val );
246 # Add the nodes to the graph.
247 # Note any reading IDs that were changed in order to comply with XML
248 # name restrictions; we have to hardcode start & end.
249 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
251 # print STDERR "Adding collation readings\n";
253 foreach my $n ( @{$graph_data->{'nodes'}} ) {
254 # If it is the start or end node, we already have one, so
255 # grab the rank and go.
256 if( defined $n->{'is_start'} ) {
257 $collation->start->rank($n->{'rank'});
260 if( defined $n->{'is_end'} ) {
261 $collation->end->rank( $n->{'rank'} );
264 # HACKY but no better way yet
265 # If $n has a 'lexemes' property then we will need the morphology for
266 # the whole tradition.
267 $need_morphology = 1 if exists $n->{'lexemes'};
268 my $gnode = $collation->add_reading( $n );
269 if( $gnode->id ne $n->{'id'} ) {
270 $namechange{$n->{'id'}} = $gnode->id;
273 # HACK continued - if any of the readings had morphology info, we
274 # must enable it for the whole tradition. Just eval it, as we will
275 # have already been warned if the morphology extension isn't installed.
276 if( $need_morphology ) {
277 eval { $tradition->enable_morphology };
281 # print STDERR "Adding collation path edges\n";
282 foreach my $e ( @{$graph_data->{'edges'}} ) {
283 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
284 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
285 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
286 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
287 my $from = $collation->reading( $sourceid );
288 my $to = $collation->reading( $targetid );
290 warn "No witness label on path edge!" unless $e->{'witness'};
291 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
292 $collation->add_path( $from, $to, $label );
294 # Add the witness if we don't have it already.
295 unless( $witnesses{$e->{'witness'}} ) {
296 $tradition->add_witness(
297 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
298 $witnesses{$e->{'witness'}} = 1;
300 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
303 ## Done with the main graph, now look at the relationships.
304 # Nodes are added via the call to add_reading above. We only need
305 # add the relationships themselves.
306 # TODO check that scoping does trt
307 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
308 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
309 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
310 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
311 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
312 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
313 my $from = $collation->reading( $sourceid );
314 my $to = $collation->reading( $targetid );
315 delete $e->{'source'};
316 delete $e->{'target'};
317 # The remaining keys are relationship attributes.
318 # Backward compatibility...
319 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
320 delete $e->{'class'};
321 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
323 # Add the specified relationship unless we already have done.
325 if( $e->{'scope'} ne 'local' ) {
326 my $relobj = $collation->get_relationship( $from, $to );
327 if( $relobj && $relobj->scope eq $e->{'scope'}
328 && $relobj->type eq $e->{'type'} ) {
333 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
334 } catch( Text::Tradition::Error $e ) {
335 warn "DROPPING $from -> $to: " . $e->message;
339 # Save the text for each witness so that we can ensure consistency
341 $collation->text_from_paths();
344 ## Return the relationship that comes first in priority.
353 my $key = exists $a->{'type'} ? 'type' : 'relationship';
354 my $at = $LAYERS{$a->{$key}} || 99;
355 my $bt = $LAYERS{$b->{$key}} || 99;
365 =item * Make this into a stream parser with GraphML
367 =item * Simply field -> attribute correspondence for nodes and edges
369 =item * Share key name constants with Collation.pm
375 This package is free software and is provided "as is" without express
376 or implied warranty. You can redistribute it and/or modify it under
377 the same terms as Perl itself.
381 Tara L Andrews E<lt>aurum@cpan.orgE<gt>