calculate common readings when we parse
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
1f7aa795 5use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
32014ec9 6
7=head1 NAME
8
9Text::Tradition::Parser::GraphML
10
e867486f 11=head1 SYNOPSIS
12
13 use Text::Tradition;
14
15 my $t_from_file = Text::Tradition->new(
16 'name' => 'my text',
17 'input' => 'Self',
18 'file' => '/path/to/tradition.xml'
19 );
20
21 my $t_from_string = Text::Tradition->new(
22 'name' => 'my text',
23 'input' => 'Self',
24 'string' => $tradition_xml,
25 );
26
32014ec9 27=head1 DESCRIPTION
28
29Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 30GraphML is a relatively simple graph description language; a 'graph' element
31can have 'node' and 'edge' elements, and each of these can have simple 'data'
32elements for attributes to be saved.
32014ec9 33
e867486f 34The graph itself has attributes as in the Collation object:
35
36=over
37
38=item * linear
39
40=item * ac_label
41
42=item * baselabel
43
44=item * wit_list_separator
45
46=back
47
48The node objects have the following attributes:
32014ec9 49
50=over
51
e867486f 52=item * name
53
54=item * reading
55
56=item * identical
57
58=item * rank
59
60=item * class
61
62=back
63
64The edge objects have the following attributes:
65
66=over
67
68=item * class
69
70=item * witness (for 'path' class edges)
71
72=item * extra (for 'path' class edges)
73
74=item * relationship (for 'relationship' class edges)
75
76=item * equal_rank (for 'relationship' class edges)
32014ec9 77
e867486f 78=item * non_correctable (for 'relationship' class edges)
32014ec9 79
e867486f 80=item * non_independent (for 'relationship' class edges)
81
82=back
83
84=head1 METHODS
85
86=head2 B<parse>
87
88parse( $graph, $opts );
89
90Takes an initialized Text::Tradition object and a set of options; creates
91the appropriate nodes and edges on the graph. The options hash should
92include either a 'file' argument or a 'string' argument, depending on the
93source of the XML to be parsed.
94
95=begin testing
96
97use Text::Tradition;
98binmode STDOUT, ":utf8";
99binmode STDERR, ":utf8";
100eval { no warnings; binmode $DB::OUT, ":utf8"; };
101
102my $tradition = 't/data/florilegium_graphml.xml';
103my $t = Text::Tradition->new(
104 'name' => 'inline',
105 'input' => 'Self',
106 'file' => $tradition,
107 );
108
109is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
110if( $t ) {
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
255875b8 112 is( scalar $t->collation->paths, 376, "Collation has all paths" );
e867486f 113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
114}
115
116=end testing
32014ec9 117
118=cut
119
2626f709 120my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY,
15db7774 121 $START_KEY, $END_KEY, $LACUNA_KEY, $COMMON_KEY,
255875b8 122 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY,
2626f709 123 $SCOPE_KEY, $CORRECT_KEY, $INDEP_KEY )
124 = qw/ id text identical rank
15db7774 125 is_start is_end is_lacuna is_common
255875b8 126 source target witness extra relationship
2626f709 127 scope non_correctable non_independent /;
32014ec9 128
129sub parse {
dfc37e38 130 my( $tradition, $opts ) = @_;
2626f709 131
132 # Collation data is in the first graph; relationship-specific stuff
133 # is in the second.
134 my( $graph_data, $rel_data ) = graphml_parse( $opts );
94c00c71 135
32014ec9 136 my $collation = $tradition->collation;
137 my %witnesses;
e309421a 138
0068967c 139 # print STDERR "Setting graph globals\n";
e3196b2a 140 $tradition->name( $graph_data->{'name'} );
2626f709 141 my $use_version;
255875b8 142 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
143 my $val = $graph_data->{'global'}->{$gkey};
144 if( $gkey eq 'version' ) {
145 $use_version = $val;
146 } else {
147 $collation->$gkey( $val );
148 }
149 }
e309421a 150
32014ec9 151 # Add the nodes to the graph.
152
0068967c 153 # print STDERR "Adding graph nodes\n";
2626f709 154 foreach my $n ( @{$graph_data->{'nodes'}} ) {
0174d6a9 155 # If it is the start or end node, we already have one, so
156 # grab the rank and go.
0068967c 157 next if( defined $n->{$START_KEY} );
0174d6a9 158 if( defined $n->{$END_KEY} ) {
159 $collation->end->rank( $n->{$RANK_KEY} );
160 next;
161 }
255875b8 162
94c00c71 163 # First extract the data that we can use without reference to
164 # anything else.
94c00c71 165
255875b8 166 # Create the node.
167 my $reading_options = {
2626f709 168 'id' => $n->{$IDKEY},
169 'is_lacuna' => $n->{$LACUNA_KEY},
15db7774 170 'is_common' => $n->{$COMMON_KEY},
255875b8 171 };
2626f709 172 my $rank = $n->{$RANK_KEY};
255875b8 173 $reading_options->{'rank'} = $rank if $rank;
2626f709 174 my $text = $n->{$TOKENKEY};
255875b8 175 $reading_options->{'text'} = $text if $text;
176
255875b8 177 my $gnode = $collation->add_reading( $reading_options );
32014ec9 178 }
910a0a6d 179
32014ec9 180 # Now add the edges.
0068967c 181 # print STDERR "Adding graph edges\n";
32014ec9 182 foreach my $e ( @{$graph_data->{'edges'}} ) {
94c00c71 183 my $from = $e->{$SOURCE_KEY};
184 my $to = $e->{$TARGET_KEY};
32014ec9 185
2626f709 186 # We need the witness, and whether it is an 'extra' reading path.
187 my $wit = $e->{$WITNESS_KEY};
188 warn "No witness label on path edge!" unless $wit;
189 my $extra = $e->{$EXTRA_KEY};
190 my $label = $wit . ( $extra ? $collation->ac_label : '' );
191 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
192 # Add the witness if we don't have it already.
193 unless( $witnesses{$wit} ) {
194 $tradition->add_witness( sigil => $wit );
195 $witnesses{$wit} = 1;
255875b8 196 }
2626f709 197 $tradition->witness( $wit )->is_layered( 1 ) if $extra;
32014ec9 198 }
2626f709 199
200 ## Done with the main graph, now look at the relationships.
201 # Nodes are added via the call to add_reading above. We only need
202 # add the relationships themselves.
203 # TODO check that scoping does trt
204 foreach my $e ( @{$rel_data->{'edges'}} ) {
205 my $from = $e->{$SOURCE_KEY};
206 my $to = $e->{$TARGET_KEY};
207 my $relationship_opts = {
208 'type' => $e->{$RELATIONSHIP_KEY},
209 'scope' => $e->{$SCOPE_KEY},
210 };
211 $relationship_opts->{'non_correctable'} = $e->{$CORRECT_KEY}
212 if exists $e->{$CORRECT_KEY};
213 $relationship_opts->{'non_independent'} = $e->{$INDEP_KEY}
214 if exists $e->{$INDEP_KEY};
215 $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY},
216 $relationship_opts );
217 }
861c3e27 218
219 # Save the text for each witness so that we can ensure consistency
220 # later on
221 $tradition->collation->text_from_paths();
222
32014ec9 223}
224
e867486f 2251;
226
227=head1 BUGS / TODO
228
229=over
230
231=item * Make this into a stream parser with GraphML
232
233=item * Simply field -> attribute correspondence for nodes and edges
234
235=item * Share key name constants with Collation.pm
236
32014ec9 237=back
238
239=head1 LICENSE
240
241This package is free software and is provided "as is" without express
242or implied warranty. You can redistribute it and/or modify it under
243the same terms as Perl itself.
244
245=head1 AUTHOR
246
e867486f 247Tara L Andrews E<lt>aurum@cpan.orgE<gt>