1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
6 use Text::Tradition::UserStore;
11 Text::Tradition::Parser::GraphML
17 my $t_from_file = Text::Tradition->new(
20 'file' => '/path/to/tradition.xml'
23 my $t_from_string = Text::Tradition->new(
26 'string' => $tradition_xml,
31 Parser module for Text::Tradition to read in its own GraphML output format.
32 GraphML is a relatively simple graph description language; a 'graph' element
33 can have 'node' and 'edge' elements, and each of these can have simple 'data'
34 elements for attributes to be saved.
36 The graph itself has attributes as in the Collation object:
46 =item * wit_list_separator
50 The node objects have the following attributes:
66 The edge objects have the following attributes:
72 =item * witness (for 'path' class edges)
74 =item * extra (for 'path' class edges)
76 =item * relationship (for 'relationship' class edges)
78 =item * equal_rank (for 'relationship' class edges)
80 =item * non_correctable (for 'relationship' class edges)
82 =item * non_independent (for 'relationship' class edges)
90 parse( $graph, $opts );
92 Takes an initialized Text::Tradition object and a set of options; creates
93 the appropriate nodes and edges on the graph. The options hash should
94 include either a 'file' argument or a 'string' argument, depending on the
95 source of the XML to be parsed.
102 binmode STDOUT, ":utf8";
103 binmode STDERR, ":utf8";
104 eval { no warnings; binmode $DB::OUT, ":utf8"; };
106 my $tradition = 't/data/florilegium_graphml.xml';
107 my $t = Text::Tradition->new(
110 'file' => $tradition,
113 is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" );
115 is( scalar $t->collation->readings, 319, "Collation has all readings" );
116 is( scalar $t->collation->paths, 376, "Collation has all paths" );
117 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
121 # the new data is there
122 $t->language('Greek');
123 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
124 $t->collation->add_relationship( 'w12', 'w13',
125 { 'type' => 'grammatical', 'scope' => 'global',
126 'annotation' => 'This is some note' } );
127 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
128 my $graphml_str = $t->collation->as_graphml;
130 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
131 is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" );
133 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
134 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
135 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
136 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
137 is( $newt->language, 'Greek', "Tradition has correct language setting" );
138 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
139 ok( $rel, "Found set relationship" );
140 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
141 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
142 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
145 # Test user save / restore
146 my $fh = File::Temp->new();
147 my $file = $fh->filename;
149 my $dsn = "dbi:SQLite:dbname=$file";
150 my $userstore = Text::Tradition::Directory->new( { dsn => $dsn,
151 extra_args => { create => 1 } } );
152 my $scope = $userstore->new_scope();
153 my $testuser = $userstore->create_user( { url => 'http://example.com' } );
154 is( ref( $testuser ), 'Text::Tradition::User', "Created test user via userstore" );
155 $testuser->add_tradition( $newt );
156 is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
157 $graphml_str = $newt->collation->as_graphml;
160 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
161 } 'DROPPING user assignment without a specified userstore',
162 "Got expected user drop warning on parse";
163 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
164 'userstore' => $userstore );
165 is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
173 my( $tradition, $opts ) = @_;
175 # Collation data is in the first graph; relationship-specific stuff
177 my( $graph_data, $rel_data ) = graphml_parse( $opts );
179 my $collation = $tradition->collation;
182 # print STDERR "Setting graph globals\n";
183 $tradition->name( $graph_data->{'name'} );
186 my $tmeta = $tradition->meta;
187 my $cmeta = $collation->meta;
188 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
189 my $val = $graph_data->{'global'}->{$gkey};
190 if( $gkey eq 'version' ) {
192 } elsif( $gkey eq 'stemmata' ) {
193 # Parse the stemmata into objects
194 foreach my $dotstr ( split( /\n/, $val ) ) {
195 $tradition->add_stemma( 'dot' => $dotstr );
197 } elsif( $gkey eq 'user' ) {
198 # Assign the tradition to the user if we can
199 if( exists $opts->{'userstore'} ) {
200 my $userdir = delete $opts->{'userstore'};
201 my $user = $userdir->find_user( { username => $val } );
203 $user->add_tradition( $tradition );
205 warn( "Found no user with ID $val; DROPPING user assignment" );
208 warn( "DROPPING user assignment without a specified userstore" );
210 } elsif( $tmeta->has_attribute( $gkey ) ) {
211 $tradition->$gkey( $val );
213 $collation->$gkey( $val );
217 # Add the nodes to the graph.
218 # Note any reading IDs that were changed in order to comply with XML
219 # name restrictions; we have to hardcode start & end.
220 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
222 # print STDERR "Adding collation readings\n";
223 foreach my $n ( @{$graph_data->{'nodes'}} ) {
224 # If it is the start or end node, we already have one, so
225 # grab the rank and go.
226 if( defined $n->{'is_start'} ) {
227 # warn Data::Dump::dump($n);
228 # warn $collation->start->id;
229 $collation->start->rank($n->{'rank'});
232 if( defined $n->{'is_end'} ) {
233 # warn Data::Dump::dump($n);
234 $collation->end->rank( $n->{'rank'} );
237 my $gnode = $collation->add_reading( $n );
238 if( $gnode->id ne $n->{'id'} ) {
239 $namechange{$n->{'id'}} = $gnode->id;
244 # print STDERR "Adding collation path edges\n";
245 foreach my $e ( @{$graph_data->{'edges'}} ) {
246 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
247 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
248 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
249 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
250 my $from = $collation->reading( $sourceid );
251 my $to = $collation->reading( $targetid );
253 warn "No witness label on path edge!" unless $e->{'witness'};
254 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
255 $collation->add_path( $from, $to, $label );
257 # Add the witness if we don't have it already.
258 unless( $witnesses{$e->{'witness'}} ) {
259 $tradition->add_witness(
260 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
261 $witnesses{$e->{'witness'}} = 1;
263 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
266 ## Done with the main graph, now look at the relationships.
267 # Nodes are added via the call to add_reading above. We only need
268 # add the relationships themselves.
269 # TODO check that scoping does trt
270 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
271 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
272 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
273 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
274 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
275 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
276 my $from = $collation->reading( $sourceid );
277 my $to = $collation->reading( $targetid );
278 delete $e->{'source'};
279 delete $e->{'target'};
280 # The remaining keys are relationship attributes.
281 # Backward compatibility...
282 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
283 delete $e->{'class'};
284 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
286 # Add the specified relationship unless we already have done.
288 if( $e->{'scope'} ne 'local' ) {
289 my $relobj = $collation->get_relationship( $from, $to );
290 if( $relobj && $relobj->scope eq $e->{'scope'}
291 && $relobj->type eq $e->{'type'} ) {
296 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
297 } catch( Text::Tradition::Error $e ) {
298 warn "DROPPING $from -> $to: " . $e->message;
302 # Save the text for each witness so that we can ensure consistency
304 $collation->text_from_paths();
307 ## Return the relationship that comes first in priority.
316 my $key = exists $a->{'type'} ? 'type' : 'relationship';
317 my $at = $LAYERS{$a->{$key}} || 99;
318 my $bt = $LAYERS{$b->{$key}} || 99;
328 =item * Make this into a stream parser with GraphML
330 =item * Simply field -> attribute correspondence for nodes and edges
332 =item * Share key name constants with Collation.pm
338 This package is free software and is provided "as is" without express
339 or implied warranty. You can redistribute it and/or modify it under
340 the same terms as Perl itself.
344 Tara L Andrews E<lt>aurum@cpan.orgE<gt>