1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
6 use Text::Tradition::UserStore;
11 Text::Tradition::Parser::GraphML
17 my $t_from_file = Text::Tradition->new(
20 'file' => '/path/to/tradition.xml'
23 my $t_from_string = Text::Tradition->new(
26 'string' => $tradition_xml,
31 Parser module for Text::Tradition to read in its own GraphML output format.
32 GraphML is a relatively simple graph description language; a 'graph' element
33 can have 'node' and 'edge' elements, and each of these can have simple 'data'
34 elements for attributes to be saved.
36 The graph itself has attributes as in the Collation object:
46 =item * wit_list_separator
50 The node objects have the following attributes:
66 The edge objects have the following attributes:
72 =item * witness (for 'path' class edges)
74 =item * extra (for 'path' class edges)
76 =item * relationship (for 'relationship' class edges)
78 =item * equal_rank (for 'relationship' class edges)
80 =item * non_correctable (for 'relationship' class edges)
82 =item * non_independent (for 'relationship' class edges)
90 parse( $graph, $opts );
92 Takes an initialized Text::Tradition object and a set of options; creates
93 the appropriate nodes and edges on the graph. The options hash should
94 include either a 'file' argument or a 'string' argument, depending on the
95 source of the XML to be parsed.
102 binmode STDOUT, ":utf8";
103 binmode STDERR, ":utf8";
104 eval { no warnings; binmode $DB::OUT, ":utf8"; };
106 my $tradition = 't/data/florilegium_graphml.xml';
107 my $t = Text::Tradition->new(
110 'file' => $tradition,
113 is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" );
115 is( scalar $t->collation->readings, 319, "Collation has all readings" );
116 is( scalar $t->collation->paths, 376, "Collation has all paths" );
117 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120 # TODO add a relationship, add a stemma, write graphml, reparse it, check that
121 # the new data is there
122 $t->language('Greek');
123 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
124 $t->collation->add_relationship( 'w12', 'w13',
125 { 'type' => 'grammatical', 'scope' => 'global',
126 'annotation' => 'This is some note' } );
127 ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
128 my $graphml_str = $t->collation->as_graphml;
130 my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
131 is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" );
133 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
134 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
135 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
136 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
137 is( $newt->language, 'Greek', "Tradition has correct language setting" );
138 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
139 ok( $rel, "Found set relationship" );
140 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
141 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
142 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
145 # Test user save / restore
146 my $fh = File::Temp->new();
147 my $file = $fh->filename;
149 my $dsn = "dbi:SQLite:dbname=$file";
150 my $userstore = Text::Tradition::UserStore->new( { dsn => $dsn,
151 extra_args => { create => 1 } } );
152 my $scope = $userstore->new_scope();
153 my $testuser = $userstore->add_user( { url => 'http://example.com' } );
154 is( ref( $testuser ), 'Text::Tradition::User', "Created test user via userstore" );
155 $testuser->add_tradition( $newt );
156 is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
157 $graphml_str = $newt->collation->as_graphml;
160 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
161 } 'DROPPING user assignment without a specified userstore',
162 "Got expected user drop warning on parse";
163 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
164 'userstore' => { 'dsn' => $dsn } );
165 is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
173 my( $tradition, $opts ) = @_;
175 # Collation data is in the first graph; relationship-specific stuff
177 my( $graph_data, $rel_data ) = graphml_parse( $opts );
179 my $collation = $tradition->collation;
182 # print STDERR "Setting graph globals\n";
183 $tradition->name( $graph_data->{'name'} );
185 my $tmeta = $tradition->meta;
186 my $cmeta = $collation->meta;
187 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
188 my $val = $graph_data->{'global'}->{$gkey};
189 if( $gkey eq 'version' ) {
191 } elsif( $gkey eq 'stemmata' ) {
192 # Parse the stemmata into objects
193 foreach my $dotstr ( split( /\n/, $val ) ) {
194 $tradition->add_stemma( 'dot' => $dotstr );
196 } elsif( $gkey eq 'user' ) {
197 # Assign the tradition to the user if we can
198 if( exists $opts->{'userstore'} ) {
201 $userdir = Text::Tradition::UserStore->new( $opts->{'userstore'} );
203 warn( "Could not connect to specified user store; DROPPING user assignment" );
205 my $user = $userdir->find_user( { username => $val } );
207 $user->add_tradition( $tradition );
209 warn( "Found no user with ID $val; DROPPING user assignment" );
212 warn( "DROPPING user assignment without a specified userstore" );
214 } elsif( $tmeta->has_attribute( $gkey ) ) {
215 $tradition->$gkey( $val );
217 $collation->$gkey( $val );
221 # Add the nodes to the graph.
222 # Note any reading IDs that were changed in order to comply with XML
223 # name restrictions; we have to hardcode start & end.
224 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
226 # print STDERR "Adding collation readings\n";
227 foreach my $n ( @{$graph_data->{'nodes'}} ) {
228 # If it is the start or end node, we already have one, so
229 # grab the rank and go.
230 next if( defined $n->{'is_start'} );
231 if( defined $n->{'is_end'} ) {
232 $collation->end->rank( $n->{'rank'} );
235 my $gnode = $collation->add_reading( $n );
236 if( $gnode->id ne $n->{'id'} ) {
237 $namechange{$n->{'id'}} = $gnode->id;
242 # print STDERR "Adding collation path edges\n";
243 foreach my $e ( @{$graph_data->{'edges'}} ) {
244 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
245 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
246 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
247 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
248 my $from = $collation->reading( $sourceid );
249 my $to = $collation->reading( $targetid );
251 warn "No witness label on path edge!" unless $e->{'witness'};
252 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
253 $collation->add_path( $from, $to, $label );
255 # Add the witness if we don't have it already.
256 unless( $witnesses{$e->{'witness'}} ) {
257 $tradition->add_witness(
258 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
259 $witnesses{$e->{'witness'}} = 1;
261 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
264 ## Done with the main graph, now look at the relationships.
265 # Nodes are added via the call to add_reading above. We only need
266 # add the relationships themselves.
267 # TODO check that scoping does trt
268 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
269 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
270 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
271 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
272 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
273 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
274 my $from = $collation->reading( $sourceid );
275 my $to = $collation->reading( $targetid );
276 delete $e->{'source'};
277 delete $e->{'target'};
278 # The remaining keys are relationship attributes.
279 # Backward compatibility...
280 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
281 delete $e->{'class'};
282 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
284 # Add the specified relationship unless we already have done.
286 if( $e->{'scope'} ne 'local' ) {
287 my $relobj = $collation->get_relationship( $from, $to );
288 if( $relobj && $relobj->scope eq $e->{'scope'}
289 && $relobj->type eq $e->{'type'} ) {
294 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
295 } catch( Text::Tradition::Error $e ) {
296 warn "DROPPING $from -> $to: " . $e->message;
300 # Save the text for each witness so that we can ensure consistency
302 $collation->text_from_paths();
305 ## Return the relationship that comes first in priority.
314 my $key = exists $a->{'type'} ? 'type' : 'relationship';
315 my $at = $LAYERS{$a->{$key}} || 99;
316 my $bt = $LAYERS{$b->{$key}} || 99;
326 =item * Make this into a stream parser with GraphML
328 =item * Simply field -> attribute correspondence for nodes and edges
330 =item * Share key name constants with Collation.pm
336 This package is free software and is provided "as is" without express
337 or implied warranty. You can redistribute it and/or modify it under
338 the same terms as Perl itself.
342 Tara L Andrews E<lt>aurum@cpan.orgE<gt>