UNFINISHED change to Analysis to incorporate IDP solver
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
1f7aa795 5use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
32014ec9 6
7=head1 NAME
8
9Text::Tradition::Parser::GraphML
10
e867486f 11=head1 SYNOPSIS
12
13 use Text::Tradition;
14
15 my $t_from_file = Text::Tradition->new(
16 'name' => 'my text',
17 'input' => 'Self',
18 'file' => '/path/to/tradition.xml'
19 );
20
21 my $t_from_string = Text::Tradition->new(
22 'name' => 'my text',
23 'input' => 'Self',
24 'string' => $tradition_xml,
25 );
26
32014ec9 27=head1 DESCRIPTION
28
29Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 30GraphML is a relatively simple graph description language; a 'graph' element
31can have 'node' and 'edge' elements, and each of these can have simple 'data'
32elements for attributes to be saved.
32014ec9 33
e867486f 34The graph itself has attributes as in the Collation object:
35
36=over
37
38=item * linear
39
40=item * ac_label
41
42=item * baselabel
43
44=item * wit_list_separator
45
46=back
47
48The node objects have the following attributes:
32014ec9 49
50=over
51
e867486f 52=item * name
53
54=item * reading
55
56=item * identical
57
58=item * rank
59
60=item * class
61
62=back
63
64The edge objects have the following attributes:
65
66=over
67
68=item * class
69
70=item * witness (for 'path' class edges)
71
72=item * extra (for 'path' class edges)
73
74=item * relationship (for 'relationship' class edges)
75
76=item * equal_rank (for 'relationship' class edges)
32014ec9 77
e867486f 78=item * non_correctable (for 'relationship' class edges)
32014ec9 79
e867486f 80=item * non_independent (for 'relationship' class edges)
81
82=back
83
84=head1 METHODS
85
86=head2 B<parse>
87
88parse( $graph, $opts );
89
90Takes an initialized Text::Tradition object and a set of options; creates
91the appropriate nodes and edges on the graph. The options hash should
92include either a 'file' argument or a 'string' argument, depending on the
93source of the XML to be parsed.
94
95=begin testing
96
97use Text::Tradition;
98binmode STDOUT, ":utf8";
99binmode STDERR, ":utf8";
100eval { no warnings; binmode $DB::OUT, ":utf8"; };
101
102my $tradition = 't/data/florilegium_graphml.xml';
103my $t = Text::Tradition->new(
104 'name' => 'inline',
105 'input' => 'Self',
106 'file' => $tradition,
107 );
108
109is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
110if( $t ) {
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
255875b8 112 is( scalar $t->collation->paths, 376, "Collation has all paths" );
e867486f 113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
114}
115
116=end testing
32014ec9 117
118=cut
119
2626f709 120my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY,
15db7774 121 $START_KEY, $END_KEY, $LACUNA_KEY, $COMMON_KEY,
255875b8 122 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY,
fdfa59a7 123 $SCOPE_KEY, $ANNOTATION_KEY, $CORRECT_KEY, $INDEP_KEY )
2626f709 124 = qw/ id text identical rank
15db7774 125 is_start is_end is_lacuna is_common
255875b8 126 source target witness extra relationship
fdfa59a7 127 scope annotation non_correctable non_independent /;
32014ec9 128
129sub parse {
dfc37e38 130 my( $tradition, $opts ) = @_;
2626f709 131
132 # Collation data is in the first graph; relationship-specific stuff
133 # is in the second.
134 my( $graph_data, $rel_data ) = graphml_parse( $opts );
94c00c71 135
32014ec9 136 my $collation = $tradition->collation;
137 my %witnesses;
e309421a 138
0068967c 139 # print STDERR "Setting graph globals\n";
e3196b2a 140 $tradition->name( $graph_data->{'name'} );
2626f709 141 my $use_version;
255875b8 142 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
143 my $val = $graph_data->{'global'}->{$gkey};
144 if( $gkey eq 'version' ) {
145 $use_version = $val;
146 } else {
147 $collation->$gkey( $val );
148 }
149 }
e309421a 150
32014ec9 151 # Add the nodes to the graph.
152
0068967c 153 # print STDERR "Adding graph nodes\n";
2626f709 154 foreach my $n ( @{$graph_data->{'nodes'}} ) {
0174d6a9 155 # If it is the start or end node, we already have one, so
156 # grab the rank and go.
0068967c 157 next if( defined $n->{$START_KEY} );
0174d6a9 158 if( defined $n->{$END_KEY} ) {
159 $collation->end->rank( $n->{$RANK_KEY} );
160 next;
161 }
255875b8 162
94c00c71 163 # First extract the data that we can use without reference to
164 # anything else.
94c00c71 165
255875b8 166 # Create the node.
167 my $reading_options = {
2626f709 168 'id' => $n->{$IDKEY},
169 'is_lacuna' => $n->{$LACUNA_KEY},
15db7774 170 'is_common' => $n->{$COMMON_KEY},
255875b8 171 };
2626f709 172 my $rank = $n->{$RANK_KEY};
255875b8 173 $reading_options->{'rank'} = $rank if $rank;
2626f709 174 my $text = $n->{$TOKENKEY};
255875b8 175 $reading_options->{'text'} = $text if $text;
176
255875b8 177 my $gnode = $collation->add_reading( $reading_options );
32014ec9 178 }
910a0a6d 179
32014ec9 180 # Now add the edges.
0068967c 181 # print STDERR "Adding graph edges\n";
32014ec9 182 foreach my $e ( @{$graph_data->{'edges'}} ) {
94c00c71 183 my $from = $e->{$SOURCE_KEY};
184 my $to = $e->{$TARGET_KEY};
32014ec9 185
2626f709 186 # We need the witness, and whether it is an 'extra' reading path.
187 my $wit = $e->{$WITNESS_KEY};
188 warn "No witness label on path edge!" unless $wit;
189 my $extra = $e->{$EXTRA_KEY};
190 my $label = $wit . ( $extra ? $collation->ac_label : '' );
191 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
192 # Add the witness if we don't have it already.
193 unless( $witnesses{$wit} ) {
194 $tradition->add_witness( sigil => $wit );
195 $witnesses{$wit} = 1;
255875b8 196 }
2626f709 197 $tradition->witness( $wit )->is_layered( 1 ) if $extra;
32014ec9 198 }
2626f709 199
200 ## Done with the main graph, now look at the relationships.
201 # Nodes are added via the call to add_reading above. We only need
202 # add the relationships themselves.
203 # TODO check that scoping does trt
204 foreach my $e ( @{$rel_data->{'edges'}} ) {
205 my $from = $e->{$SOURCE_KEY};
206 my $to = $e->{$TARGET_KEY};
207 my $relationship_opts = {
208 'type' => $e->{$RELATIONSHIP_KEY},
209 'scope' => $e->{$SCOPE_KEY},
210 };
fdfa59a7 211 $relationship_opts->{'annotation'} = $e->{$ANNOTATION_KEY}
212 if exists $e->{$ANNOTATION_KEY};
2626f709 213 $relationship_opts->{'non_correctable'} = $e->{$CORRECT_KEY}
214 if exists $e->{$CORRECT_KEY};
215 $relationship_opts->{'non_independent'} = $e->{$INDEP_KEY}
216 if exists $e->{$INDEP_KEY};
fdfa59a7 217 # TODO unless relationship is scoped and that scoped relationship exists...
218 my $rel_exists;
219 if( $relationship_opts->{'scope'} ne 'local' ) {
220 my $relobj = $collation->get_relationship( $from->{$IDKEY}, $to->{$IDKEY} );
221 if( $relobj && $relobj->{'scope'} eq $relationship_opts->{'scope'}
222 && $relobj->{'type'} eq $relationship_opts->{'type'} ) {
223 $rel_exists = 1;
224 }
225 }
2626f709 226 $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY},
fdfa59a7 227 $relationship_opts ) unless $rel_exists;
2626f709 228 }
861c3e27 229
230 # Save the text for each witness so that we can ensure consistency
231 # later on
232 $tradition->collation->text_from_paths();
233
32014ec9 234}
235
e867486f 2361;
237
238=head1 BUGS / TODO
239
240=over
241
242=item * Make this into a stream parser with GraphML
243
244=item * Simply field -> attribute correspondence for nodes and edges
245
246=item * Share key name constants with Collation.pm
247
32014ec9 248=back
249
250=head1 LICENSE
251
252This package is free software and is provided "as is" without express
253or implied warranty. You can redistribute it and/or modify it under
254the same terms as Perl itself.
255
256=head1 AUTHOR
257
e867486f 258Tara L Andrews E<lt>aurum@cpan.orgE<gt>