add conflict styling to analysis table
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
e867486f 5use Text::Tradition::Parser::GraphML qw/ graphml_parse populate_witness_path /;
32014ec9 6
7=head1 NAME
8
9Text::Tradition::Parser::GraphML
10
e867486f 11=head1 SYNOPSIS
12
13 use Text::Tradition;
14
15 my $t_from_file = Text::Tradition->new(
16 'name' => 'my text',
17 'input' => 'Self',
18 'file' => '/path/to/tradition.xml'
19 );
20
21 my $t_from_string = Text::Tradition->new(
22 'name' => 'my text',
23 'input' => 'Self',
24 'string' => $tradition_xml,
25 );
26
32014ec9 27=head1 DESCRIPTION
28
29Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 30GraphML is a relatively simple graph description language; a 'graph' element
31can have 'node' and 'edge' elements, and each of these can have simple 'data'
32elements for attributes to be saved.
32014ec9 33
e867486f 34The graph itself has attributes as in the Collation object:
35
36=over
37
38=item * linear
39
40=item * ac_label
41
42=item * baselabel
43
44=item * wit_list_separator
45
46=back
47
48The node objects have the following attributes:
32014ec9 49
50=over
51
e867486f 52=item * name
53
54=item * reading
55
56=item * identical
57
58=item * rank
59
60=item * class
61
62=back
63
64The edge objects have the following attributes:
65
66=over
67
68=item * class
69
70=item * witness (for 'path' class edges)
71
72=item * extra (for 'path' class edges)
73
74=item * relationship (for 'relationship' class edges)
75
76=item * equal_rank (for 'relationship' class edges)
32014ec9 77
e867486f 78=item * non_correctable (for 'relationship' class edges)
32014ec9 79
e867486f 80=item * non_independent (for 'relationship' class edges)
81
82=back
83
84=head1 METHODS
85
86=head2 B<parse>
87
88parse( $graph, $opts );
89
90Takes an initialized Text::Tradition object and a set of options; creates
91the appropriate nodes and edges on the graph. The options hash should
92include either a 'file' argument or a 'string' argument, depending on the
93source of the XML to be parsed.
94
95=begin testing
96
97use Text::Tradition;
98binmode STDOUT, ":utf8";
99binmode STDERR, ":utf8";
100eval { no warnings; binmode $DB::OUT, ":utf8"; };
101
102my $tradition = 't/data/florilegium_graphml.xml';
103my $t = Text::Tradition->new(
104 'name' => 'inline',
105 'input' => 'Self',
106 'file' => $tradition,
107 );
108
109is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
110if( $t ) {
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
112 is( scalar $t->collation->paths, 2854, "Collation has all paths" );
113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
114}
115
116=end testing
32014ec9 117
118=cut
119
94c00c71 120my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY,
121 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY )
122 = qw/ name reading identical rank class
123 source target witness extra relationship/;
32014ec9 124
125sub parse {
dfc37e38 126 my( $tradition, $opts ) = @_;
e867486f 127 my $graph_data = graphml_parse( $opts );
94c00c71 128
32014ec9 129 my $collation = $tradition->collation;
130 my %witnesses;
e309421a 131
132 # Set up the graph-global attributes. They will appear in the
133 # hash under their accessor names.
e309421a 134 print STDERR "Setting graph globals\n";
135 foreach my $gkey ( keys %{$graph_data->{'attr'}} ) {
136 my $val = $graph_data->{'attr'}->{$gkey};
137 $collation->$gkey( $val );
138 }
139
32014ec9 140 # Add the nodes to the graph.
141
142 my $extra_data = {}; # Keep track of data that needs to be processed
143 # after the nodes & edges are created.
f6066bac 144 print STDERR "Adding graph nodes\n";
32014ec9 145 foreach my $n ( @{$graph_data->{'nodes'}} ) {
94c00c71 146 # First extract the data that we can use without reference to
147 # anything else.
148 my %node_data = %$n; # Need $n itself untouched for edge processing
910a0a6d 149 my $nodeid = delete $node_data{$IDKEY};
150 my $reading = delete $node_data{$TOKENKEY};
94c00c71 151 my $class = delete $node_data{$CLASS_KEY} || '';
152 my $rank = delete $node_data{$RANK_KEY};
153
154 # Create the node. Current valid classes are common and meta.
0106ea2e 155 # Everything else is a normal reading.
94c00c71 156 my $gnode = $collation->add_reading( $nodeid );
157 $gnode->text( $reading );
158 $gnode->make_common if $class eq 'common';
159 $gnode->is_meta( 1 ) if $class eq 'meta';
0106ea2e 160 # This is a horrible hack.
161 $gnode->is_lacuna( $reading =~ /^\#LACUNA/ );
94c00c71 162 $gnode->rank( $rank ) if defined $rank;
163
164 # Now save the data that we need for post-processing,
910a0a6d 165 # if it exists.
166 if ( keys %node_data ) {
94c00c71 167 $extra_data->{$nodeid} = \%node_data
910a0a6d 168 }
32014ec9 169 }
910a0a6d 170
32014ec9 171 # Now add the edges.
f6066bac 172 print STDERR "Adding graph edges\n";
e867486f 173 my $has_ante_corr = {};
32014ec9 174 foreach my $e ( @{$graph_data->{'edges'}} ) {
94c00c71 175 my $from = $e->{$SOURCE_KEY};
176 my $to = $e->{$TARGET_KEY};
177 my $class = $e->{$CLASS_KEY};
178
179 # We may have more information depending on the class.
180 if( $class eq 'path' ) {
181 # We need the witness, and whether it is an 'extra' reading path.
182 my $wit = $e->{$WITNESS_KEY};
183 warn "No witness label on path edge!" unless $wit;
184 my $extra = $e->{$EXTRA_KEY};
185 my $label = $wit . ( $extra ? $collation->ac_label : '' );
186 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
187 # Add the witness if we don't have it already.
188 unless( $witnesses{$wit} ) {
189 $tradition->add_witness( sigil => $wit );
190 $witnesses{$wit} = 1;
191 }
e867486f 192 $has_ante_corr->{$wit} = 1 if $extra;
94c00c71 193 } elsif( $class eq 'relationship' ) {
c9bf3dbf 194 # We need the metadata about the relationship.
195 my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} };
196 $opts->{'equal_rank'} = $e->{'equal_rank'}
197 if exists $e->{'equal_rank'};
198 $opts->{'non_correctable'} = $e->{'non_correctable'}
199 if exists $e->{'non_correctable'};
200 $opts->{'non_independent'} = $e->{'non_independent'}
201 if exists $e->{'non_independent'};
202 warn "No relationship type for relationship edge!" unless $opts->{'type'};
203 $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts );
94c00c71 204 }
32014ec9 205 }
206
207 ## Deal with node information (transposition, relationships, etc.) that
208 ## needs to be processed after all the nodes are created.
94c00c71 209 print STDERR "Adding second-pass node data\n";
32014ec9 210 foreach my $nkey ( keys %$extra_data ) {
910a0a6d 211 foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
212 my $this_reading = $collation->reading( $nkey );
213 if( $edkey eq $TRANSPOS_KEY ) {
214 my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
910a0a6d 215 $this_reading->set_identical( $other_reading );
910a0a6d 216 } else {
217 warn "Unfamiliar reading node data $edkey for $nkey";
218 }
219 }
32014ec9 220 }
e309421a 221
222 # Set the $witness->path arrays for each wit.
e867486f 223 populate_witness_path( $tradition, $has_ante_corr );
32014ec9 224}
225
e867486f 2261;
227
228=head1 BUGS / TODO
229
230=over
231
232=item * Make this into a stream parser with GraphML
233
234=item * Simply field -> attribute correspondence for nodes and edges
235
236=item * Share key name constants with Collation.pm
237
32014ec9 238=back
239
240=head1 LICENSE
241
242This package is free software and is provided "as is" without express
243or implied warranty. You can redistribute it and/or modify it under
244the same terms as Perl itself.
245
246=head1 AUTHOR
247
e867486f 248Tara L Andrews E<lt>aurum@cpan.orgE<gt>