disregard Analysis/Result.pm
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
1f7aa795 5use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
9fef629b 6use Text::Tradition::UserStore;
00c5bf0b 7use TryCatch;
32014ec9 8
9=head1 NAME
10
11Text::Tradition::Parser::GraphML
12
e867486f 13=head1 SYNOPSIS
14
15 use Text::Tradition;
16
17 my $t_from_file = Text::Tradition->new(
18 'name' => 'my text',
19 'input' => 'Self',
20 'file' => '/path/to/tradition.xml'
21 );
22
23 my $t_from_string = Text::Tradition->new(
24 'name' => 'my text',
25 'input' => 'Self',
26 'string' => $tradition_xml,
27 );
28
32014ec9 29=head1 DESCRIPTION
30
31Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 32GraphML is a relatively simple graph description language; a 'graph' element
33can have 'node' and 'edge' elements, and each of these can have simple 'data'
34elements for attributes to be saved.
32014ec9 35
e867486f 36The graph itself has attributes as in the Collation object:
37
38=over
39
40=item * linear
41
42=item * ac_label
43
44=item * baselabel
45
46=item * wit_list_separator
47
48=back
49
50The node objects have the following attributes:
32014ec9 51
52=over
53
e867486f 54=item * name
55
56=item * reading
57
58=item * identical
59
60=item * rank
61
62=item * class
63
64=back
65
66The edge objects have the following attributes:
67
68=over
69
70=item * class
71
72=item * witness (for 'path' class edges)
73
74=item * extra (for 'path' class edges)
75
76=item * relationship (for 'relationship' class edges)
77
78=item * equal_rank (for 'relationship' class edges)
32014ec9 79
e867486f 80=item * non_correctable (for 'relationship' class edges)
32014ec9 81
e867486f 82=item * non_independent (for 'relationship' class edges)
83
84=back
85
86=head1 METHODS
87
88=head2 B<parse>
89
90parse( $graph, $opts );
91
92Takes an initialized Text::Tradition object and a set of options; creates
93the appropriate nodes and edges on the graph. The options hash should
94include either a 'file' argument or a 'string' argument, depending on the
95source of the XML to be parsed.
96
97=begin testing
98
9fef629b 99use File::Temp;
100use Test::Warn;
e867486f 101use Text::Tradition;
102binmode STDOUT, ":utf8";
103binmode STDERR, ":utf8";
104eval { no warnings; binmode $DB::OUT, ":utf8"; };
105
106my $tradition = 't/data/florilegium_graphml.xml';
107my $t = Text::Tradition->new(
108 'name' => 'inline',
109 'input' => 'Self',
110 'file' => $tradition,
111 );
112
bbd064a9 113is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" );
e867486f 114if( $t ) {
115 is( scalar $t->collation->readings, 319, "Collation has all readings" );
255875b8 116 is( scalar $t->collation->paths, 376, "Collation has all paths" );
e867486f 117 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
118}
119
2a812726 120# TODO add a relationship, add a stemma, write graphml, reparse it, check that
121# the new data is there
bbd064a9 122$t->language('Greek');
2a812726 123$t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
bbd064a9 124$t->collation->add_relationship( 'w12', 'w13',
125 { 'type' => 'grammatical', 'scope' => 'global',
126 'annotation' => 'This is some note' } );
127ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
128my $graphml_str = $t->collation->as_graphml;
129
130my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
131is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" );
132if( $newt ) {
133 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
134 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
135 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
136 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
137 is( $newt->language, 'Greek', "Tradition has correct language setting" );
138 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
139 ok( $rel, "Found set relationship" );
140 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
2a812726 141 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
142 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
bbd064a9 143}
144
9fef629b 145# Test user save / restore
146my $fh = File::Temp->new();
147my $file = $fh->filename;
148$fh->close;
149my $dsn = "dbi:SQLite:dbname=$file";
150my $userstore = Text::Tradition::UserStore->new( { dsn => $dsn,
151 extra_args => { create => 1 } } );
152my $scope = $userstore->new_scope();
153my $testuser = $userstore->add_user( { url => 'http://example.com' } );
154is( ref( $testuser ), 'Text::Tradition::User', "Created test user via userstore" );
155$testuser->add_tradition( $newt );
156is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
157$graphml_str = $newt->collation->as_graphml;
158my $usert;
159warning_is {
160 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
161} 'DROPPING user assignment without a specified userstore',
162 "Got expected user drop warning on parse";
163$usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
164 'userstore' => { 'dsn' => $dsn } );
165is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
166
bbd064a9 167
e867486f 168=end testing
32014ec9 169
170=cut
144d845b 171use Data::Dump;
32014ec9 172sub parse {
dfc37e38 173 my( $tradition, $opts ) = @_;
2626f709 174
175 # Collation data is in the first graph; relationship-specific stuff
176 # is in the second.
177 my( $graph_data, $rel_data ) = graphml_parse( $opts );
144d845b 178
32014ec9 179 my $collation = $tradition->collation;
180 my %witnesses;
e309421a 181
0068967c 182 # print STDERR "Setting graph globals\n";
e3196b2a 183 $tradition->name( $graph_data->{'name'} );
144d845b 184
2626f709 185 my $use_version;
bbd064a9 186 my $tmeta = $tradition->meta;
187 my $cmeta = $collation->meta;
255875b8 188 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
189 my $val = $graph_data->{'global'}->{$gkey};
190 if( $gkey eq 'version' ) {
191 $use_version = $val;
9fef629b 192 } elsif( $gkey eq 'stemmata' ) {
193 # Parse the stemmata into objects
2a812726 194 foreach my $dotstr ( split( /\n/, $val ) ) {
195 $tradition->add_stemma( 'dot' => $dotstr );
196 }
9fef629b 197 } elsif( $gkey eq 'user' ) {
198 # Assign the tradition to the user if we can
199 if( exists $opts->{'userstore'} ) {
200 my $userdir;
201 try {
202 $userdir = Text::Tradition::UserStore->new( $opts->{'userstore'} );
203 } catch {
204 warn( "Could not connect to specified user store; DROPPING user assignment" );
205 }
206 my $user = $userdir->find_user( { username => $val } );
207 if( $user ) {
208 $user->add_tradition( $tradition );
209 } else {
210 warn( "Found no user with ID $val; DROPPING user assignment" );
211 }
212 } else {
213 warn( "DROPPING user assignment without a specified userstore" );
214 }
bbd064a9 215 } elsif( $tmeta->has_attribute( $gkey ) ) {
216 $tradition->$gkey( $val );
255875b8 217 } else {
218 $collation->$gkey( $val );
219 }
220 }
e309421a 221
10e4b1ac 222 # Add the nodes to the graph.
223 # Note any reading IDs that were changed in order to comply with XML
224 # name restrictions; we have to hardcode start & end.
225 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
32014ec9 226
bbd064a9 227 # print STDERR "Adding collation readings\n";
2626f709 228 foreach my $n ( @{$graph_data->{'nodes'}} ) {
0174d6a9 229 # If it is the start or end node, we already have one, so
230 # grab the rank and go.
144d845b 231 if( defined $n->{'is_start'} ) {
f3f26624 232# warn Data::Dump::dump($n);
233# warn $collation->start->id;
144d845b 234 $collation->start->rank($n->{'rank'});
235 next;
236 }
bbd064a9 237 if( defined $n->{'is_end'} ) {
f3f26624 238# warn Data::Dump::dump($n);
bbd064a9 239 $collation->end->rank( $n->{'rank'} );
0174d6a9 240 next;
241 }
bbd064a9 242 my $gnode = $collation->add_reading( $n );
10e4b1ac 243 if( $gnode->id ne $n->{'id'} ) {
244 $namechange{$n->{'id'}} = $gnode->id;
245 }
32014ec9 246 }
910a0a6d 247
32014ec9 248 # Now add the edges.
bbd064a9 249 # print STDERR "Adding collation path edges\n";
32014ec9 250 foreach my $e ( @{$graph_data->{'edges'}} ) {
10e4b1ac 251 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
252 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
253 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
254 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
255 my $from = $collation->reading( $sourceid );
256 my $to = $collation->reading( $targetid );
bbd064a9 257
258 warn "No witness label on path edge!" unless $e->{'witness'};
259 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
260 $collation->add_path( $from, $to, $label );
261
2626f709 262 # Add the witness if we don't have it already.
bbd064a9 263 unless( $witnesses{$e->{'witness'}} ) {
82fa4d57 264 $tradition->add_witness(
265 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
bbd064a9 266 $witnesses{$e->{'witness'}} = 1;
255875b8 267 }
bbd064a9 268 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
32014ec9 269 }
2626f709 270
271 ## Done with the main graph, now look at the relationships.
272 # Nodes are added via the call to add_reading above. We only need
273 # add the relationships themselves.
274 # TODO check that scoping does trt
bf6e338d 275 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
276 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
10e4b1ac 277 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
278 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
279 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
280 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
281 my $from = $collation->reading( $sourceid );
282 my $to = $collation->reading( $targetid );
bbd064a9 283 delete $e->{'source'};
284 delete $e->{'target'};
285 # The remaining keys are relationship attributes.
286 # Backward compatibility...
287 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
288 delete $e->{'class'};
289 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
290 }
291 # Add the specified relationship unless we already have done.
fdfa59a7 292 my $rel_exists;
bbd064a9 293 if( $e->{'scope'} ne 'local' ) {
294 my $relobj = $collation->get_relationship( $from, $to );
295 if( $relobj && $relobj->scope eq $e->{'scope'}
296 && $relobj->type eq $e->{'type'} ) {
fdfa59a7 297 $rel_exists = 1;
298 }
299 }
00c5bf0b 300 try {
301 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
302 } catch( Text::Tradition::Error $e ) {
303 warn "DROPPING $from -> $to: " . $e->message;
304 }
2626f709 305 }
861c3e27 306
307 # Save the text for each witness so that we can ensure consistency
308 # later on
bbd064a9 309 $collation->text_from_paths();
32014ec9 310}
311
bf6e338d 312## Return the relationship that comes first in priority.
313my %LAYERS = (
314 'collated' => 1,
315 'orthographic' => 2,
316 'spelling' => 3,
317 );
318
319sub _layersort_rel {
320 my( $a, $b ) = @_;
321 my $key = exists $a->{'type'} ? 'type' : 'relationship';
322 my $at = $LAYERS{$a->{$key}} || 99;
323 my $bt = $LAYERS{$b->{$key}} || 99;
324 return $at <=> $bt;
325}
326
e867486f 3271;
328
329=head1 BUGS / TODO
330
331=over
332
333=item * Make this into a stream parser with GraphML
334
335=item * Simply field -> attribute correspondence for nodes and edges
336
337=item * Share key name constants with Collation.pm
338
32014ec9 339=back
340
341=head1 LICENSE
342
343This package is free software and is provided "as is" without express
344or implied warranty. You can redistribute it and/or modify it under
345the same terms as Perl itself.
346
347=head1 AUTHOR
348
e867486f 349Tara L Andrews E<lt>aurum@cpan.orgE<gt>