make the rest of the tests work with the new Witness
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
1f7aa795 5use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
32014ec9 6
7=head1 NAME
8
9Text::Tradition::Parser::GraphML
10
e867486f 11=head1 SYNOPSIS
12
13 use Text::Tradition;
14
15 my $t_from_file = Text::Tradition->new(
16 'name' => 'my text',
17 'input' => 'Self',
18 'file' => '/path/to/tradition.xml'
19 );
20
21 my $t_from_string = Text::Tradition->new(
22 'name' => 'my text',
23 'input' => 'Self',
24 'string' => $tradition_xml,
25 );
26
32014ec9 27=head1 DESCRIPTION
28
29Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 30GraphML is a relatively simple graph description language; a 'graph' element
31can have 'node' and 'edge' elements, and each of these can have simple 'data'
32elements for attributes to be saved.
32014ec9 33
e867486f 34The graph itself has attributes as in the Collation object:
35
36=over
37
38=item * linear
39
40=item * ac_label
41
42=item * baselabel
43
44=item * wit_list_separator
45
46=back
47
48The node objects have the following attributes:
32014ec9 49
50=over
51
e867486f 52=item * name
53
54=item * reading
55
56=item * identical
57
58=item * rank
59
60=item * class
61
62=back
63
64The edge objects have the following attributes:
65
66=over
67
68=item * class
69
70=item * witness (for 'path' class edges)
71
72=item * extra (for 'path' class edges)
73
74=item * relationship (for 'relationship' class edges)
75
76=item * equal_rank (for 'relationship' class edges)
32014ec9 77
e867486f 78=item * non_correctable (for 'relationship' class edges)
32014ec9 79
e867486f 80=item * non_independent (for 'relationship' class edges)
81
82=back
83
84=head1 METHODS
85
86=head2 B<parse>
87
88parse( $graph, $opts );
89
90Takes an initialized Text::Tradition object and a set of options; creates
91the appropriate nodes and edges on the graph. The options hash should
92include either a 'file' argument or a 'string' argument, depending on the
93source of the XML to be parsed.
94
95=begin testing
96
97use Text::Tradition;
98binmode STDOUT, ":utf8";
99binmode STDERR, ":utf8";
100eval { no warnings; binmode $DB::OUT, ":utf8"; };
101
102my $tradition = 't/data/florilegium_graphml.xml';
103my $t = Text::Tradition->new(
104 'name' => 'inline',
105 'input' => 'Self',
106 'file' => $tradition,
107 );
108
bbd064a9 109is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" );
e867486f 110if( $t ) {
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
255875b8 112 is( scalar $t->collation->paths, 376, "Collation has all paths" );
e867486f 113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
114}
115
bbd064a9 116# TODO add a relationship, write graphml, reparse it, check that the rel
117# is still there
118$t->language('Greek');
119$t->collation->add_relationship( 'w12', 'w13',
120 { 'type' => 'grammatical', 'scope' => 'global',
121 'annotation' => 'This is some note' } );
122ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" );
123my $graphml_str = $t->collation->as_graphml;
124
125my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str );
126is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" );
127if( $newt ) {
128 is( scalar $newt->collation->readings, 319, "Collation has all readings" );
129 is( scalar $newt->collation->paths, 376, "Collation has all paths" );
130 is( scalar $newt->witnesses, 13, "Collation has all witnesses" );
131 is( scalar $newt->collation->relationships, 1, "Collation has added relationship" );
132 is( $newt->language, 'Greek', "Tradition has correct language setting" );
133 my $rel = $newt->collation->get_relationship( 'w12', 'w13' );
134 ok( $rel, "Found set relationship" );
135 is( $rel->annotation, 'This is some note', "Relationship has its properties" );
136}
137
138
e867486f 139=end testing
32014ec9 140
141=cut
142
32014ec9 143sub parse {
dfc37e38 144 my( $tradition, $opts ) = @_;
2626f709 145
146 # Collation data is in the first graph; relationship-specific stuff
147 # is in the second.
148 my( $graph_data, $rel_data ) = graphml_parse( $opts );
94c00c71 149
32014ec9 150 my $collation = $tradition->collation;
151 my %witnesses;
e309421a 152
0068967c 153 # print STDERR "Setting graph globals\n";
e3196b2a 154 $tradition->name( $graph_data->{'name'} );
2626f709 155 my $use_version;
bbd064a9 156 my $tmeta = $tradition->meta;
157 my $cmeta = $collation->meta;
255875b8 158 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
159 my $val = $graph_data->{'global'}->{$gkey};
160 if( $gkey eq 'version' ) {
161 $use_version = $val;
bbd064a9 162 } elsif( $tmeta->has_attribute( $gkey ) ) {
163 $tradition->$gkey( $val );
255875b8 164 } else {
165 $collation->$gkey( $val );
166 }
167 }
e309421a 168
32014ec9 169 # Add the nodes to the graph.
170
bbd064a9 171 # print STDERR "Adding collation readings\n";
2626f709 172 foreach my $n ( @{$graph_data->{'nodes'}} ) {
0174d6a9 173 # If it is the start or end node, we already have one, so
174 # grab the rank and go.
bbd064a9 175 next if( defined $n->{'is_start'} );
176 if( defined $n->{'is_end'} ) {
177 $collation->end->rank( $n->{'rank'} );
0174d6a9 178 next;
179 }
bbd064a9 180 my $gnode = $collation->add_reading( $n );
32014ec9 181 }
910a0a6d 182
32014ec9 183 # Now add the edges.
bbd064a9 184 # print STDERR "Adding collation path edges\n";
32014ec9 185 foreach my $e ( @{$graph_data->{'edges'}} ) {
bbd064a9 186 my $from = $collation->reading( $e->{'source'}->{'id'} );
187 my $to = $collation->reading( $e->{'target'}->{'id'} );
188
189 warn "No witness label on path edge!" unless $e->{'witness'};
190 my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' );
191 $collation->add_path( $from, $to, $label );
192
2626f709 193 # Add the witness if we don't have it already.
bbd064a9 194 unless( $witnesses{$e->{'witness'}} ) {
82fa4d57 195 $tradition->add_witness(
196 sigil => $e->{'witness'}, 'sourcetype' => 'collation' );
bbd064a9 197 $witnesses{$e->{'witness'}} = 1;
255875b8 198 }
bbd064a9 199 $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'};
32014ec9 200 }
2626f709 201
202 ## Done with the main graph, now look at the relationships.
203 # Nodes are added via the call to add_reading above. We only need
204 # add the relationships themselves.
205 # TODO check that scoping does trt
bf6e338d 206 $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels
207 foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) {
bbd064a9 208 my $from = $collation->reading( $e->{'source'}->{'id'} );
209 my $to = $collation->reading( $e->{'target'}->{'id'} );
210 delete $e->{'source'};
211 delete $e->{'target'};
212 # The remaining keys are relationship attributes.
213 # Backward compatibility...
214 if( $use_version eq '2.0' || $use_version eq '3.0' ) {
215 delete $e->{'class'};
216 $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'};
217 }
218 # Add the specified relationship unless we already have done.
fdfa59a7 219 my $rel_exists;
bbd064a9 220 if( $e->{'scope'} ne 'local' ) {
221 my $relobj = $collation->get_relationship( $from, $to );
222 if( $relobj && $relobj->scope eq $e->{'scope'}
223 && $relobj->type eq $e->{'type'} ) {
fdfa59a7 224 $rel_exists = 1;
225 }
226 }
bbd064a9 227 $collation->add_relationship( $from, $to, $e ) unless $rel_exists;
2626f709 228 }
861c3e27 229
230 # Save the text for each witness so that we can ensure consistency
231 # later on
bbd064a9 232 $collation->text_from_paths();
32014ec9 233}
234
bf6e338d 235## Return the relationship that comes first in priority.
236my %LAYERS = (
237 'collated' => 1,
238 'orthographic' => 2,
239 'spelling' => 3,
240 );
241
242sub _layersort_rel {
243 my( $a, $b ) = @_;
244 my $key = exists $a->{'type'} ? 'type' : 'relationship';
245 my $at = $LAYERS{$a->{$key}} || 99;
246 my $bt = $LAYERS{$b->{$key}} || 99;
247 return $at <=> $bt;
248}
249
e867486f 2501;
251
252=head1 BUGS / TODO
253
254=over
255
256=item * Make this into a stream parser with GraphML
257
258=item * Simply field -> attribute correspondence for nodes and edges
259
260=item * Share key name constants with Collation.pm
261
32014ec9 262=back
263
264=head1 LICENSE
265
266This package is free software and is provided "as is" without express
267or implied warranty. You can redistribute it and/or modify it under
268the same terms as Perl itself.
269
270=head1 AUTHOR
271
e867486f 272Tara L Andrews E<lt>aurum@cpan.orgE<gt>