import and export users in GraphML
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
1f7aa795 5use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
9fef629b 6use Text::Tradition::UserStore;
00c5bf0b 7use TryCatch;
32014ec9 8
9=head1 NAME
10
11Text::Tradition::Parser::GraphML
12
e867486f 13=head1 SYNOPSIS
14
15 use Text::Tradition;
16
17 my $t_from_file = Text::Tradition->new(
18 'name' => 'my text',
19 'input' => 'Self',
20 'file' => '/path/to/tradition.xml'
21 );
22
23 my $t_from_string = Text::Tradition->new(
24 'name' => 'my text',
25 'input' => 'Self',
26 'string' => $tradition_xml,
27 );
28
32014ec9 29=head1 DESCRIPTION
30
31Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 32GraphML is a relatively simple graph description language; a 'graph' element
33can have 'node' and 'edge' elements, and each of these can have simple 'data'
34elements for attributes to be saved.
32014ec9 35
e867486f 36The graph itself has attributes as in the Collation object:
37
38=over
39
40=item * linear
41
42=item * ac_label
43
44=item * baselabel
45
46=item * wit_list_separator
47
48=back
49
50The node objects have the following attributes:
32014ec9 51
52=over
53
e867486f 54=item * name
55
56=item * reading
57
58=item * identical
59
60=item * rank
61
62=item * class
63
64=back
65
66The edge objects have the following attributes:
67
68=over
69
70=item * class
71
72=item * witness (for 'path' class edges)
73
74=item * extra (for 'path' class edges)
75
76=item * relationship (for 'relationship' class edges)
77
78=item * equal_rank (for 'relationship' class edges)
32014ec9 79
e867486f 80=item * non_correctable (for 'relationship' class edges)
32014ec9 81
e867486f 82=item * non_independent (for 'relationship' class edges)
83
84=back
85
86=head1 METHODS
87
88=head2 B<parse>
89
90parse( $graph, $opts );
91
92Takes an initialized Text::Tradition object and a set of options; creates
93the appropriate nodes and edges on the graph. The options hash should
94include either a 'file' argument or a 'string' argument, depending on the
95source of the XML to be parsed.
96
97=begin testing
98
9fef629b 99use File::Temp;
100use Test::Warn;
e867486f 101use Text::Tradition;
102binmode STDOUT, ":utf8";
103binmode STDERR, ":utf8";
104eval { no warnings; binmode $DB::OUT, ":utf8"; };
105
106my $tradition = 't/data/florilegium_graphml.xml';
107my $t = Text::Tradition->new(
108 'name' => 'inline',
109 'input' => 'Self',
110 'file' => $tradition,
111 );
112
bbd064a9 113is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" );
e867486f 114if( $t ) {
115 is( scalar $t->collation->readings, 319, "Collation has all readings" );
255875b8 116 is( scalar $t->collation->paths, 376, "Collation has all paths" );
e867486f 117 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
118}
119
2a812726 120# TODO add a relationship, add a stemma, write graphml, reparse it, check that
121# the new data is there
bbd064a9 122$t->language('Greek');
2a812726 123$t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
bbd064a9 124$t->collation->add_relationship( 'w12', 'w13',
125 { 'type' => 'grammatical', 'scope' => 'global',
126 'annotation' => 'This is some note' } );
127ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
128my $graphml_str = $t->collation->as_graphml;
129
130my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
131is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" );
132if( $newt ) {
133 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
134 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
135 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
136 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
137 is( $newt->language, 'Greek', "Tradition has correct language setting" );
138 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
139 ok( $rel, "Found set relationship" );
140 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
2a812726 141 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
142 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
bbd064a9 143}
144
9fef629b 145# Test user save / restore
146my $fh = File::Temp->new();
147my $file = $fh->filename;
148$fh->close;
149my $dsn = "dbi:SQLite:dbname=$file";
150my $userstore = Text::Tradition::UserStore->new( { dsn => $dsn,
151 extra_args => { create => 1 } } );
152my $scope = $userstore->new_scope();
153my $testuser = $userstore->add_user( { url => 'http://example.com' } );
154is( ref( $testuser ), 'Text::Tradition::User', "Created test user via userstore" );
155$testuser->add_tradition( $newt );
156is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
157$graphml_str = $newt->collation->as_graphml;
158my $usert;
159warning_is {
160 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
161} 'DROPPING user assignment without a specified userstore',
162 "Got expected user drop warning on parse";
163$usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
164 'userstore' => { 'dsn' => $dsn } );
165is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
166
bbd064a9 167
e867486f 168=end testing
32014ec9 169
170=cut
171
32014ec9 172sub parse {
dfc37e38 173 my( $tradition, $opts ) = @_;
2626f709 174
175 # Collation data is in the first graph; relationship-specific stuff
176 # is in the second.
177 my( $graph_data, $rel_data ) = graphml_parse( $opts );
94c00c71 178
32014ec9 179 my $collation = $tradition->collation;
180 my %witnesses;
e309421a 181
0068967c 182 # print STDERR "Setting graph globals\n";
e3196b2a 183 $tradition->name( $graph_data->{'name'} );
2626f709 184 my $use_version;
bbd064a9 185 my $tmeta = $tradition->meta;
186 my $cmeta = $collation->meta;
255875b8 187 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
188 my $val = $graph_data->{'global'}->{$gkey};
189 if( $gkey eq 'version' ) {
190 $use_version = $val;
9fef629b 191 } elsif( $gkey eq 'stemmata' ) {
192 # Parse the stemmata into objects
2a812726 193 foreach my $dotstr ( split( /\n/, $val ) ) {
194 $tradition->add_stemma( 'dot' => $dotstr );
195 }
9fef629b 196 } elsif( $gkey eq 'user' ) {
197 # Assign the tradition to the user if we can
198 if( exists $opts->{'userstore'} ) {
199 my $userdir;
200 try {
201 $userdir = Text::Tradition::UserStore->new( $opts->{'userstore'} );
202 } catch {
203 warn( "Could not connect to specified user store; DROPPING user assignment" );
204 }
205 my $user = $userdir->find_user( { username => $val } );
206 if( $user ) {
207 $user->add_tradition( $tradition );
208 } else {
209 warn( "Found no user with ID $val; DROPPING user assignment" );
210 }
211 } else {
212 warn( "DROPPING user assignment without a specified userstore" );
213 }
bbd064a9 214 } elsif( $tmeta->has_attribute( $gkey ) ) {
215 $tradition->$gkey( $val );
255875b8 216 } else {
217 $collation->$gkey( $val );
218 }
219 }
e309421a 220
10e4b1ac 221 # Add the nodes to the graph.
222 # Note any reading IDs that were changed in order to comply with XML
223 # name restrictions; we have to hardcode start & end.
224 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
32014ec9 225
bbd064a9 226 # print STDERR "Adding collation readings\n";
2626f709 227 foreach my $n ( @{$graph_data->{'nodes'}} ) {
0174d6a9 228 # If it is the start or end node, we already have one, so
229 # grab the rank and go.
bbd064a9 230 next if( defined $n->{'is_start'} );
231 if( defined $n->{'is_end'} ) {
232 $collation->end->rank( $n->{'rank'} );
0174d6a9 233 next;
234 }
bbd064a9 235 my $gnode = $collation->add_reading( $n );
10e4b1ac 236 if( $gnode->id ne $n->{'id'} ) {
237 $namechange{$n->{'id'}} = $gnode->id;
238 }
32014ec9 239 }
910a0a6d 240
32014ec9 241 # Now add the edges.
bbd064a9 242 # print STDERR "Adding collation path edges\n";
32014ec9 243 foreach my $e ( @{$graph_data->{'edges'}} ) {
10e4b1ac 244 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
245 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
246 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
247 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
248 my $from = $collation->reading( $sourceid );
249 my $to = $collation->reading( $targetid );
bbd064a9 250
251 warn "No witness label on path edge!" unless $e->{'witness'};
252 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
253 $collation->add_path( $from, $to, $label );
254
2626f709 255 # Add the witness if we don't have it already.
bbd064a9 256 unless( $witnesses{$e->{'witness'}} ) {
82fa4d57 257 $tradition->add_witness(
258 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
bbd064a9 259 $witnesses{$e->{'witness'}} = 1;
255875b8 260 }
bbd064a9 261 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
32014ec9 262 }
2626f709 263
264 ## Done with the main graph, now look at the relationships.
265 # Nodes are added via the call to add_reading above. We only need
266 # add the relationships themselves.
267 # TODO check that scoping does trt
bf6e338d 268 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
269 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
10e4b1ac 270 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
271 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
272 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
273 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
274 my $from = $collation->reading( $sourceid );
275 my $to = $collation->reading( $targetid );
bbd064a9 276 delete $e->{'source'};
277 delete $e->{'target'};
278 # The remaining keys are relationship attributes.
279 # Backward compatibility...
280 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
281 delete $e->{'class'};
282 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
283 }
284 # Add the specified relationship unless we already have done.
fdfa59a7 285 my $rel_exists;
bbd064a9 286 if( $e->{'scope'} ne 'local' ) {
287 my $relobj = $collation->get_relationship( $from, $to );
288 if( $relobj && $relobj->scope eq $e->{'scope'}
289 && $relobj->type eq $e->{'type'} ) {
fdfa59a7 290 $rel_exists = 1;
291 }
292 }
00c5bf0b 293 try {
294 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
295 } catch( Text::Tradition::Error $e ) {
296 warn "DROPPING $from -> $to: " . $e->message;
297 }
2626f709 298 }
861c3e27 299
300 # Save the text for each witness so that we can ensure consistency
301 # later on
bbd064a9 302 $collation->text_from_paths();
32014ec9 303}
304
bf6e338d 305## Return the relationship that comes first in priority.
306my %LAYERS = (
307 'collated' => 1,
308 'orthographic' => 2,
309 'spelling' => 3,
310 );
311
312sub _layersort_rel {
313 my( $a, $b ) = @_;
314 my $key = exists $a->{'type'} ? 'type' : 'relationship';
315 my $at = $LAYERS{$a->{$key}} || 99;
316 my $bt = $LAYERS{$b->{$key}} || 99;
317 return $at <=> $bt;
318}
319
e867486f 3201;
321
322=head1 BUGS / TODO
323
324=over
325
326=item * Make this into a stream parser with GraphML
327
328=item * Simply field -> attribute correspondence for nodes and edges
329
330=item * Share key name constants with Collation.pm
331
32014ec9 332=back
333
334=head1 LICENSE
335
336This package is free software and is provided "as is" without express
337or implied warranty. You can redistribute it and/or modify it under
338the same terms as Perl itself.
339
340=head1 AUTHOR
341
e867486f 342Tara L Andrews E<lt>aurum@cpan.orgE<gt>