load extensions statically to avoid bad object wrapping interactions
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
1f7aa795 5use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
00c5bf0b 6use TryCatch;
32014ec9 7
8=head1 NAME
9
10Text::Tradition::Parser::GraphML
11
e867486f 12=head1 SYNOPSIS
13
14 use Text::Tradition;
15
16 my $t_from_file = Text::Tradition->new(
17 'name' => 'my text',
18 'input' => 'Self',
19 'file' => '/path/to/tradition.xml'
20 );
21
22 my $t_from_string = Text::Tradition->new(
23 'name' => 'my text',
24 'input' => 'Self',
25 'string' => $tradition_xml,
26 );
27
32014ec9 28=head1 DESCRIPTION
29
30Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 31GraphML is a relatively simple graph description language; a 'graph' element
32can have 'node' and 'edge' elements, and each of these can have simple 'data'
33elements for attributes to be saved.
32014ec9 34
e867486f 35The graph itself has attributes as in the Collation object:
36
37=over
38
39=item * linear
40
41=item * ac_label
42
43=item * baselabel
44
45=item * wit_list_separator
46
47=back
48
49The node objects have the following attributes:
32014ec9 50
51=over
52
e867486f 53=item * name
54
55=item * reading
56
57=item * identical
58
59=item * rank
60
61=item * class
62
63=back
64
65The edge objects have the following attributes:
66
67=over
68
69=item * class
70
71=item * witness (for 'path' class edges)
72
73=item * extra (for 'path' class edges)
74
75=item * relationship (for 'relationship' class edges)
76
77=item * equal_rank (for 'relationship' class edges)
32014ec9 78
e867486f 79=item * non_correctable (for 'relationship' class edges)
32014ec9 80
e867486f 81=item * non_independent (for 'relationship' class edges)
82
83=back
84
85=head1 METHODS
86
87=head2 B<parse>
88
89parse( $graph, $opts );
90
91Takes an initialized Text::Tradition object and a set of options; creates
92the appropriate nodes and edges on the graph. The options hash should
93include either a 'file' argument or a 'string' argument, depending on the
94source of the XML to be parsed.
95
96=begin testing
97
9fef629b 98use File::Temp;
951ddfe8 99use Safe::Isa;
9fef629b 100use Test::Warn;
e867486f 101use Text::Tradition;
a445ce40 102use Text::Tradition::Directory;
951ddfe8 103use TryCatch;
e867486f 104binmode STDOUT, ":utf8";
105binmode STDERR, ":utf8";
106eval { no warnings; binmode $DB::OUT, ":utf8"; };
107
108my $tradition = 't/data/florilegium_graphml.xml';
109my $t = Text::Tradition->new(
110 'name' => 'inline',
111 'input' => 'Self',
112 'file' => $tradition,
113 );
114
951ddfe8 115ok( $t->$_isa('Text::Tradition'), "Parsed GraphML version 2" );
e867486f 116if( $t ) {
117 is( scalar $t->collation->readings, 319, "Collation has all readings" );
255875b8 118 is( scalar $t->collation->paths, 376, "Collation has all paths" );
e867486f 119 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120}
121
2a812726 122# TODO add a relationship, add a stemma, write graphml, reparse it, check that
123# the new data is there
bbd064a9 124$t->language('Greek');
37bf09f4 125my $stemma_enabled = $t->can('add_stemma');
951ddfe8 126if( $stemma_enabled ) {
127 $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
128}
bbd064a9 129$t->collation->add_relationship( 'w12', 'w13',
130 { 'type' => 'grammatical', 'scope' => 'global',
131 'annotation' => 'This is some note' } );
132ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
133my $graphml_str = $t->collation->as_graphml;
134
135my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
951ddfe8 136ok( $newt->$_isa('Text::Tradition'), "Parsed current GraphML version" );
bbd064a9 137if( $newt ) {
138 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
139 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
140 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
141 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
142 is( $newt->language, 'Greek', "Tradition has correct language setting" );
143 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
144 ok( $rel, "Found set relationship" );
145 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
951ddfe8 146 if( $stemma_enabled ) {
147 is( scalar $newt->stemmata, 1, "Tradition has its stemma" );
148 is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" );
149 }
bbd064a9 150}
151
9fef629b 152# Test user save / restore
153my $fh = File::Temp->new();
154my $file = $fh->filename;
155$fh->close;
156my $dsn = "dbi:SQLite:dbname=$file";
1df4baa9 157my $userstore = Text::Tradition::Directory->new( { dsn => $dsn,
9fef629b 158 extra_args => { create => 1 } } );
159my $scope = $userstore->new_scope();
1df4baa9 160my $testuser = $userstore->create_user( { url => 'http://example.com' } );
951ddfe8 161ok( $testuser->$_isa('Text::Tradition::User'), "Created test user via userstore" );
9fef629b 162$testuser->add_tradition( $newt );
163is( $newt->user->id, $testuser->id, "Assigned tradition to test user" );
164$graphml_str = $newt->collation->as_graphml;
165my $usert;
166warning_is {
167 $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
168} 'DROPPING user assignment without a specified userstore',
169 "Got expected user drop warning on parse";
170$usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str,
1df4baa9 171 'userstore' => $userstore );
9fef629b 172is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" );
173
951ddfe8 174# Test warning if we can
175unless( $stemma_enabled ) {
176 my $nst;
177 warnings_exist {
178 $nst = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lexformat.xml' );
179 } [qr/DROPPING stemmata/],
180 "Got expected stemma drop warning on parse";
181}
bbd064a9 182
e867486f 183=end testing
32014ec9 184
185=cut
144d845b 186use Data::Dump;
32014ec9 187sub parse {
dfc37e38 188 my( $tradition, $opts ) = @_;
2626f709 189
190 # Collation data is in the first graph; relationship-specific stuff
191 # is in the second.
192 my( $graph_data, $rel_data ) = graphml_parse( $opts );
144d845b 193
32014ec9 194 my $collation = $tradition->collation;
195 my %witnesses;
e309421a 196
0068967c 197 # print STDERR "Setting graph globals\n";
e3196b2a 198 $tradition->name( $graph_data->{'name'} );
144d845b 199
2626f709 200 my $use_version;
bbd064a9 201 my $tmeta = $tradition->meta;
202 my $cmeta = $collation->meta;
255875b8 203 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
204 my $val = $graph_data->{'global'}->{$gkey};
205 if( $gkey eq 'version' ) {
206 $use_version = $val;
9fef629b 207 } elsif( $gkey eq 'stemmata' ) {
951ddfe8 208 # Make sure we can handle stemmata
9fef629b 209 # Parse the stemmata into objects
37bf09f4 210 if( $tradition->can('add_stemma') ) {
951ddfe8 211 foreach my $dotstr ( split( /\n/, $val ) ) {
212 $tradition->add_stemma( 'dot' => $dotstr );
213 }
37bf09f4 214 } else {
215 warn "Analysis module not installed; DROPPING stemmata";
2a812726 216 }
9fef629b 217 } elsif( $gkey eq 'user' ) {
218 # Assign the tradition to the user if we can
219 if( exists $opts->{'userstore'} ) {
1df4baa9 220 my $userdir = delete $opts->{'userstore'};
9fef629b 221 my $user = $userdir->find_user( { username => $val } );
222 if( $user ) {
223 $user->add_tradition( $tradition );
224 } else {
225 warn( "Found no user with ID $val; DROPPING user assignment" );
226 }
227 } else {
228 warn( "DROPPING user assignment without a specified userstore" );
229 }
bbd064a9 230 } elsif( $tmeta->has_attribute( $gkey ) ) {
231 $tradition->$gkey( $val );
255875b8 232 } else {
233 $collation->$gkey( $val );
234 }
235 }
e309421a 236
10e4b1ac 237 # Add the nodes to the graph.
238 # Note any reading IDs that were changed in order to comply with XML
239 # name restrictions; we have to hardcode start & end.
240 my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' );
32014ec9 241
bbd064a9 242 # print STDERR "Adding collation readings\n";
2626f709 243 foreach my $n ( @{$graph_data->{'nodes'}} ) {
0174d6a9 244 # If it is the start or end node, we already have one, so
245 # grab the rank and go.
144d845b 246 if( defined $n->{'is_start'} ) {
a445ce40 247 $collation->start->rank($n->{'rank'});
248 next;
144d845b 249 }
bbd064a9 250 if( defined $n->{'is_end'} ) {
251 $collation->end->rank( $n->{'rank'} );
0174d6a9 252 next;
253 }
bbd064a9 254 my $gnode = $collation->add_reading( $n );
10e4b1ac 255 if( $gnode->id ne $n->{'id'} ) {
256 $namechange{$n->{'id'}} = $gnode->id;
257 }
32014ec9 258 }
910a0a6d 259
32014ec9 260 # Now add the edges.
bbd064a9 261 # print STDERR "Adding collation path edges\n";
32014ec9 262 foreach my $e ( @{$graph_data->{'edges'}} ) {
10e4b1ac 263 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
264 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
265 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
266 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
267 my $from = $collation->reading( $sourceid );
268 my $to = $collation->reading( $targetid );
bbd064a9 269
270 warn "No witness label on path edge!" unless $e->{'witness'};
271 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
272 $collation->add_path( $from, $to, $label );
273
2626f709 274 # Add the witness if we don't have it already.
bbd064a9 275 unless( $witnesses{$e->{'witness'}} ) {
82fa4d57 276 $tradition->add_witness(
277 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
bbd064a9 278 $witnesses{$e->{'witness'}} = 1;
255875b8 279 }
bbd064a9 280 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
32014ec9 281 }
2626f709 282
283 ## Done with the main graph, now look at the relationships.
284 # Nodes are added via the call to add_reading above. We only need
285 # add the relationships themselves.
286 # TODO check that scoping does trt
bf6e338d 287 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
288 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
10e4b1ac 289 my $sourceid = exists $namechange{$e->{'source'}->{'id'}}
290 ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'};
291 my $targetid = exists $namechange{$e->{'target'}->{'id'}}
292 ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'};
293 my $from = $collation->reading( $sourceid );
294 my $to = $collation->reading( $targetid );
bbd064a9 295 delete $e->{'source'};
296 delete $e->{'target'};
297 # The remaining keys are relationship attributes.
298 # Backward compatibility...
299 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
300 delete $e->{'class'};
301 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
302 }
303 # Add the specified relationship unless we already have done.
fdfa59a7 304 my $rel_exists;
bbd064a9 305 if( $e->{'scope'} ne 'local' ) {
306 my $relobj = $collation->get_relationship( $from, $to );
307 if( $relobj && $relobj->scope eq $e->{'scope'}
308 && $relobj->type eq $e->{'type'} ) {
fdfa59a7 309 $rel_exists = 1;
310 }
311 }
00c5bf0b 312 try {
313 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
314 } catch( Text::Tradition::Error $e ) {
315 warn "DROPPING $from -> $to: " . $e->message;
316 }
2626f709 317 }
861c3e27 318
319 # Save the text for each witness so that we can ensure consistency
320 # later on
bbd064a9 321 $collation->text_from_paths();
32014ec9 322}
323
bf6e338d 324## Return the relationship that comes first in priority.
325my %LAYERS = (
326 'collated' => 1,
327 'orthographic' => 2,
328 'spelling' => 3,
329 );
330
331sub _layersort_rel {
332 my( $a, $b ) = @_;
333 my $key = exists $a->{'type'} ? 'type' : 'relationship';
334 my $at = $LAYERS{$a->{$key}} || 99;
335 my $bt = $LAYERS{$b->{$key}} || 99;
336 return $at <=> $bt;
337}
338
e867486f 3391;
340
341=head1 BUGS / TODO
342
343=over
344
345=item * Make this into a stream parser with GraphML
346
347=item * Simply field -> attribute correspondence for nodes and edges
348
349=item * Share key name constants with Collation.pm
350
32014ec9 351=back
352
353=head1 LICENSE
354
355This package is free software and is provided "as is" without express
356or implied warranty. You can redistribute it and/or modify it under
357the same terms as Perl itself.
358
359=head1 AUTHOR
360
e867486f 361Tara L Andrews E<lt>aurum@cpan.orgE<gt>