avoid dying on relationship conflicts
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
1f7aa795 5use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
00c5bf0b 6use TryCatch;
32014ec9 7
8=head1 NAME
9
10Text::Tradition::Parser::GraphML
11
e867486f 12=head1 SYNOPSIS
13
14 use Text::Tradition;
15
16 my $t_from_file = Text::Tradition->new(
17 'name' => 'my text',
18 'input' => 'Self',
19 'file' => '/path/to/tradition.xml'
20 );
21
22 my $t_from_string = Text::Tradition->new(
23 'name' => 'my text',
24 'input' => 'Self',
25 'string' => $tradition_xml,
26 );
27
32014ec9 28=head1 DESCRIPTION
29
30Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 31GraphML is a relatively simple graph description language; a 'graph' element
32can have 'node' and 'edge' elements, and each of these can have simple 'data'
33elements for attributes to be saved.
32014ec9 34
e867486f 35The graph itself has attributes as in the Collation object:
36
37=over
38
39=item * linear
40
41=item * ac_label
42
43=item * baselabel
44
45=item * wit_list_separator
46
47=back
48
49The node objects have the following attributes:
32014ec9 50
51=over
52
e867486f 53=item * name
54
55=item * reading
56
57=item * identical
58
59=item * rank
60
61=item * class
62
63=back
64
65The edge objects have the following attributes:
66
67=over
68
69=item * class
70
71=item * witness (for 'path' class edges)
72
73=item * extra (for 'path' class edges)
74
75=item * relationship (for 'relationship' class edges)
76
77=item * equal_rank (for 'relationship' class edges)
32014ec9 78
e867486f 79=item * non_correctable (for 'relationship' class edges)
32014ec9 80
e867486f 81=item * non_independent (for 'relationship' class edges)
82
83=back
84
85=head1 METHODS
86
87=head2 B<parse>
88
89parse( $graph, $opts );
90
91Takes an initialized Text::Tradition object and a set of options; creates
92the appropriate nodes and edges on the graph. The options hash should
93include either a 'file' argument or a 'string' argument, depending on the
94source of the XML to be parsed.
95
96=begin testing
97
98use Text::Tradition;
99binmode STDOUT, ":utf8";
100binmode STDERR, ":utf8";
101eval { no warnings; binmode $DB::OUT, ":utf8"; };
102
103my $tradition = 't/data/florilegium_graphml.xml';
104my $t = Text::Tradition->new(
105 'name' => 'inline',
106 'input' => 'Self',
107 'file' => $tradition,
108 );
109
bbd064a9 110is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" );
e867486f 111if( $t ) {
112 is( scalar $t->collation->readings, 319, "Collation has all readings" );
255875b8 113 is( scalar $t->collation->paths, 376, "Collation has all paths" );
e867486f 114 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
115}
116
bbd064a9 117# TODO add a relationship, write graphml, reparse it, check that the rel
118# is still there
119$t->language('Greek');
120$t->collation->add_relationship( 'w12', 'w13',
121 { 'type' => 'grammatical', 'scope' => 'global',
122 'annotation' => 'This is some note' } );
123ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
124my $graphml_str = $t->collation->as_graphml;
125
126my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
127is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" );
128if( $newt ) {
129 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
130 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
131 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
132 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
133 is( $newt->language, 'Greek', "Tradition has correct language setting" );
134 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
135 ok( $rel, "Found set relationship" );
136 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
137}
138
139
e867486f 140=end testing
32014ec9 141
142=cut
143
32014ec9 144sub parse {
dfc37e38 145 my( $tradition, $opts ) = @_;
2626f709 146
147 # Collation data is in the first graph; relationship-specific stuff
148 # is in the second.
149 my( $graph_data, $rel_data ) = graphml_parse( $opts );
94c00c71 150
32014ec9 151 my $collation = $tradition->collation;
152 my %witnesses;
e309421a 153
0068967c 154 # print STDERR "Setting graph globals\n";
e3196b2a 155 $tradition->name( $graph_data->{'name'} );
2626f709 156 my $use_version;
bbd064a9 157 my $tmeta = $tradition->meta;
158 my $cmeta = $collation->meta;
255875b8 159 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
160 my $val = $graph_data->{'global'}->{$gkey};
161 if( $gkey eq 'version' ) {
162 $use_version = $val;
bbd064a9 163 } elsif( $tmeta->has_attribute( $gkey ) ) {
164 $tradition->$gkey( $val );
255875b8 165 } else {
166 $collation->$gkey( $val );
167 }
168 }
e309421a 169
32014ec9 170 # Add the nodes to the graph.
171
bbd064a9 172 # print STDERR "Adding collation readings\n";
2626f709 173 foreach my $n ( @{$graph_data->{'nodes'}} ) {
0174d6a9 174 # If it is the start or end node, we already have one, so
175 # grab the rank and go.
bbd064a9 176 next if( defined $n->{'is_start'} );
177 if( defined $n->{'is_end'} ) {
178 $collation->end->rank( $n->{'rank'} );
0174d6a9 179 next;
180 }
bbd064a9 181 my $gnode = $collation->add_reading( $n );
32014ec9 182 }
910a0a6d 183
32014ec9 184 # Now add the edges.
bbd064a9 185 # print STDERR "Adding collation path edges\n";
32014ec9 186 foreach my $e ( @{$graph_data->{'edges'}} ) {
bbd064a9 187 my $from = $collation->reading( $e->{'source'}->{'id'} );
188 my $to = $collation->reading( $e->{'target'}->{'id'} );
189
190 warn "No witness label on path edge!" unless $e->{'witness'};
191 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
192 $collation->add_path( $from, $to, $label );
193
2626f709 194 # Add the witness if we don't have it already.
bbd064a9 195 unless( $witnesses{$e->{'witness'}} ) {
82fa4d57 196 $tradition->add_witness(
197 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
bbd064a9 198 $witnesses{$e->{'witness'}} = 1;
255875b8 199 }
bbd064a9 200 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
32014ec9 201 }
2626f709 202
203 ## Done with the main graph, now look at the relationships.
204 # Nodes are added via the call to add_reading above. We only need
205 # add the relationships themselves.
206 # TODO check that scoping does trt
bf6e338d 207 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
208 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
bbd064a9 209 my $from = $collation->reading( $e->{'source'}->{'id'} );
210 my $to = $collation->reading( $e->{'target'}->{'id'} );
211 delete $e->{'source'};
212 delete $e->{'target'};
213 # The remaining keys are relationship attributes.
214 # Backward compatibility...
215 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
216 delete $e->{'class'};
217 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
218 }
219 # Add the specified relationship unless we already have done.
fdfa59a7 220 my $rel_exists;
bbd064a9 221 if( $e->{'scope'} ne 'local' ) {
222 my $relobj = $collation->get_relationship( $from, $to );
223 if( $relobj && $relobj->scope eq $e->{'scope'}
224 && $relobj->type eq $e->{'type'} ) {
fdfa59a7 225 $rel_exists = 1;
226 }
227 }
00c5bf0b 228 try {
229 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
230 } catch( Text::Tradition::Error $e ) {
231 warn "DROPPING $from -> $to: " . $e->message;
232 }
2626f709 233 }
861c3e27 234
235 # Save the text for each witness so that we can ensure consistency
236 # later on
bbd064a9 237 $collation->text_from_paths();
32014ec9 238}
239
bf6e338d 240## Return the relationship that comes first in priority.
241my %LAYERS = (
242 'collated' => 1,
243 'orthographic' => 2,
244 'spelling' => 3,
245 );
246
247sub _layersort_rel {
248 my( $a, $b ) = @_;
249 my $key = exists $a->{'type'} ? 'type' : 'relationship';
250 my $at = $LAYERS{$a->{$key}} || 99;
251 my $bt = $LAYERS{$b->{$key}} || 99;
252 return $at <=> $bt;
253}
254
e867486f 2551;
256
257=head1 BUGS / TODO
258
259=over
260
261=item * Make this into a stream parser with GraphML
262
263=item * Simply field -> attribute correspondence for nodes and edges
264
265=item * Share key name constants with Collation.pm
266
32014ec9 267=back
268
269=head1 LICENSE
270
271This package is free software and is provided "as is" without express
272or implied warranty. You can redistribute it and/or modify it under
273the same terms as Perl itself.
274
275=head1 AUTHOR
276
e867486f 277Tara L Andrews E<lt>aurum@cpan.orgE<gt>