1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
10 Text::Tradition::Parser::GraphML
16 my $t_from_file = Text::Tradition->new(
19 'file' => '/path/to/tradition.xml'
22 my $t_from_string = Text::Tradition->new(
25 'string' => $tradition_xml,
30 Parser module for Text::Tradition to read in its own GraphML output format.
31 GraphML is a relatively simple graph description language; a 'graph' element
32 can have 'node' and 'edge' elements, and each of these can have simple 'data'
33 elements for attributes to be saved.
35 The graph itself has attributes as in the Collation object:
45 =item * wit_list_separator
49 The node objects have the following attributes:
65 The edge objects have the following attributes:
71 =item * witness (for 'path' class edges)
73 =item * extra (for 'path' class edges)
75 =item * relationship (for 'relationship' class edges)
77 =item * equal_rank (for 'relationship' class edges)
79 =item * non_correctable (for 'relationship' class edges)
81 =item * non_independent (for 'relationship' class edges)
89 parse( $graph, $opts );
91 Takes an initialized Text::Tradition object and a set of options; creates
92 the appropriate nodes and edges on the graph. The options hash should
93 include either a 'file' argument or a 'string' argument, depending on the
94 source of the XML to be parsed.
102 use Text::Tradition::Directory;
104 binmode STDOUT, ":utf8";
105 binmode STDERR, ":utf8";
106 eval { no warnings; binmode $DB::OUT, ":utf8"; };
108 my $tradition = 't/data/florilegium_graphml.xml';
109 my $t = Text::Tradition->new(
112 'file' => $tradition,
115 ok( $t->$_isa('Text::Tradition'), "Parsed GraphML version 2" );
117 is( scalar $t->collation->readings, 319, "Collation has all readings" );
118 is( scalar $t->collation->paths, 376, "Collation has all paths" );
119 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
122 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
123 # the new data is there
124 my $language_enabled = $t->can('language');
125 if( $language_enabled ) {
126 $t->language('Greek');
128 my $stemma_enabled = $t->can('add_stemma');
129 if( $stemma_enabled ) {
130 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
132 $t->collation->add_relationship( 'w12', 'w13',
133 { 'type' => 'grammatical', 'scope' => 'global',
134 'annotation' => 'This is some note' } );
135 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
136 my $graphml_str = $t->collation->as_graphml;
138 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
139 ok( $newt->$_isa('Text::Tradition'), "Parsed current GraphML version" );
141 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
142 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
143 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
144 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
145 if( $language_enabled ) {
146 is( $newt->language, 'Greek', "Tradition has correct language setting" );
148 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
149 ok( $rel, "Found set relationship" );
150 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
151 if( $stemma_enabled ) {
152 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
153 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
157 # Test user save / restore
158 my $fh = File::Temp->new();
159 my $file = $fh->filename;
161 my $dsn = "dbi:SQLite:dbname=$file";
162 my $userstore = Text::Tradition::Directory->new( { dsn => $dsn,
163 extra_args => { create => 1 } } );
164 my $scope = $userstore->new_scope();
165 my $testuser = $userstore->create_user( { url => 'http://example.com' } );
166 ok( $testuser->$_isa('Text::Tradition::User'), "Created test user via userstore" );
167 $testuser->add_tradition( $newt );
168 is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
169 $graphml_str = $newt->collation->as_graphml;
172 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
173 } 'DROPPING user assignment without a specified userstore',
174 "Got expected user drop warning on parse";
175 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
176 'userstore' => $userstore );
177 is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
179 # Test warning if we can
180 unless( $stemma_enabled ) {
183 $nst = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lexformat.xml' );
184 } [qr/DROPPING stemmata/],
185 "Got expected stemma drop warning on parse";
193 my( $tradition, $opts ) = @_;
195 # Collation data is in the first graph; relationship-specific stuff
197 my( $graph_data, $rel_data ) = graphml_parse( $opts );
199 my $collation = $tradition->collation;
202 # print STDERR "Setting graph globals\n";
203 $tradition->name( $graph_data->{'name'} );
206 my $tmeta = $tradition->meta;
207 my $cmeta = $collation->meta;
208 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
209 my $val = $graph_data->{'global'}->{$gkey};
210 if( $gkey eq 'version' ) {
212 } elsif( $gkey eq 'stemmata' ) {
213 # Make sure we can handle stemmata
214 # Parse the stemmata into objects
215 if( $tradition->can('add_stemma') ) {
216 foreach my $dotstr ( split( /\n/, $val ) ) {
217 $tradition->add_stemma( 'dot' => $dotstr );
220 warn "Analysis module not installed; DROPPING stemmata";
222 } elsif( $gkey eq 'language' ) {
223 if( $tradition->can('language') ) {
224 $tradition->language( $val );
226 warn "Morphology module not installed; DROPPING language";
228 } elsif( $gkey eq 'user' ) {
229 # Assign the tradition to the user if we can
230 if( exists $opts->{'userstore'} ) {
231 my $userdir = delete $opts->{'userstore'};
232 my $user = $userdir->find_user( { username => $val } );
234 $user->add_tradition( $tradition );
236 warn( "Found no user with ID $val; DROPPING user assignment" );
239 warn( "DROPPING user assignment without a specified userstore" );
241 } elsif( $tmeta->has_attribute( $gkey ) ) {
242 $tradition->$gkey( $val );
244 $collation->$gkey( $val );
248 # Add the nodes to the graph.
249 # Note any reading IDs that were changed in order to comply with XML
250 # name restrictions; we have to hardcode start & end.
251 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
253 # print STDERR "Adding collation readings\n";
254 foreach my $n ( @{$graph_data->{'nodes'}} ) {
255 # If it is the start or end node, we already have one, so
256 # grab the rank and go.
257 if( defined $n->{'is_start'} ) {
258 $collation->start->rank($n->{'rank'});
261 if( defined $n->{'is_end'} ) {
262 $collation->end->rank( $n->{'rank'} );
265 my $gnode = $collation->add_reading( $n );
266 if( $gnode->id ne $n->{'id'} ) {
267 $namechange{$n->{'id'}} = $gnode->id;
272 # print STDERR "Adding collation path edges\n";
273 foreach my $e ( @{$graph_data->{'edges'}} ) {
274 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
275 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
276 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
277 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
278 my $from = $collation->reading( $sourceid );
279 my $to = $collation->reading( $targetid );
281 warn "No witness label on path edge!" unless $e->{'witness'};
282 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
283 $collation->add_path( $from, $to, $label );
285 # Add the witness if we don't have it already.
286 unless( $witnesses{$e->{'witness'}} ) {
287 $tradition->add_witness(
288 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
289 $witnesses{$e->{'witness'}} = 1;
291 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
294 ## Done with the main graph, now look at the relationships.
295 # Nodes are added via the call to add_reading above. We only need
296 # add the relationships themselves.
297 # TODO check that scoping does trt
298 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
299 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
300 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
301 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
302 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
303 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
304 my $from = $collation->reading( $sourceid );
305 my $to = $collation->reading( $targetid );
306 delete $e->{'source'};
307 delete $e->{'target'};
308 # The remaining keys are relationship attributes.
309 # Backward compatibility...
310 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
311 delete $e->{'class'};
312 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
314 # Add the specified relationship unless we already have done.
316 if( $e->{'scope'} ne 'local' ) {
317 my $relobj = $collation->get_relationship( $from, $to );
318 if( $relobj && $relobj->scope eq $e->{'scope'}
319 && $relobj->type eq $e->{'type'} ) {
324 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
325 } catch( Text::Tradition::Error $e ) {
326 warn "DROPPING $from -> $to: " . $e->message;
330 # Save the text for each witness so that we can ensure consistency
332 $collation->text_from_paths();
335 ## Return the relationship that comes first in priority.
344 my $key = exists $a->{'type'} ? 'type' : 'relationship';
345 my $at = $LAYERS{$a->{$key}} || 99;
346 my $bt = $LAYERS{$b->{$key}} || 99;
356 =item * Make this into a stream parser with GraphML
358 =item * Simply field -> attribute correspondence for nodes and edges
360 =item * Share key name constants with Collation.pm
366 This package is free software and is provided "as is" without express
367 or implied warranty. You can redistribute it and/or modify it under
368 the same terms as Perl itself.
372 Tara L Andrews E<lt>aurum@cpan.orgE<gt>