convert Catalyst app to use KiokuDB backend
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
e867486f 5use Text::Tradition::Parser::GraphML qw/ graphml_parse populate_witness_path /;
32014ec9 6
7=head1 NAME
8
9Text::Tradition::Parser::GraphML
10
e867486f 11=head1 SYNOPSIS
12
13 use Text::Tradition;
14
15 my $t_from_file = Text::Tradition->new(
16 'name' => 'my text',
17 'input' => 'Self',
18 'file' => '/path/to/tradition.xml'
19 );
20
21 my $t_from_string = Text::Tradition->new(
22 'name' => 'my text',
23 'input' => 'Self',
24 'string' => $tradition_xml,
25 );
26
32014ec9 27=head1 DESCRIPTION
28
29Parser module for Text::Tradition to read in its own GraphML output format.
e867486f 30GraphML is a relatively simple graph description language; a 'graph' element
31can have 'node' and 'edge' elements, and each of these can have simple 'data'
32elements for attributes to be saved.
32014ec9 33
e867486f 34The graph itself has attributes as in the Collation object:
35
36=over
37
38=item * linear
39
40=item * ac_label
41
42=item * baselabel
43
44=item * wit_list_separator
45
46=back
47
48The node objects have the following attributes:
32014ec9 49
50=over
51
e867486f 52=item * name
53
54=item * reading
55
56=item * identical
57
58=item * rank
59
60=item * class
61
62=back
63
64The edge objects have the following attributes:
65
66=over
67
68=item * class
69
70=item * witness (for 'path' class edges)
71
72=item * extra (for 'path' class edges)
73
74=item * relationship (for 'relationship' class edges)
75
76=item * equal_rank (for 'relationship' class edges)
32014ec9 77
e867486f 78=item * non_correctable (for 'relationship' class edges)
32014ec9 79
e867486f 80=item * non_independent (for 'relationship' class edges)
81
82=back
83
84=head1 METHODS
85
86=head2 B<parse>
87
88parse( $graph, $opts );
89
90Takes an initialized Text::Tradition object and a set of options; creates
91the appropriate nodes and edges on the graph. The options hash should
92include either a 'file' argument or a 'string' argument, depending on the
93source of the XML to be parsed.
94
95=begin testing
96
97use Text::Tradition;
98binmode STDOUT, ":utf8";
99binmode STDERR, ":utf8";
100eval { no warnings; binmode $DB::OUT, ":utf8"; };
101
102my $tradition = 't/data/florilegium_graphml.xml';
103my $t = Text::Tradition->new(
104 'name' => 'inline',
105 'input' => 'Self',
106 'file' => $tradition,
107 );
108
109is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
110if( $t ) {
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
112 is( scalar $t->collation->paths, 2854, "Collation has all paths" );
113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
114}
115
116=end testing
32014ec9 117
118=cut
119
94c00c71 120my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY,
121 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY )
122 = qw/ name reading identical rank class
123 source target witness extra relationship/;
32014ec9 124
125sub parse {
dfc37e38 126 my( $tradition, $opts ) = @_;
e867486f 127 my $graph_data = graphml_parse( $opts );
94c00c71 128
32014ec9 129 my $collation = $tradition->collation;
130 my %witnesses;
e309421a 131
132 # Set up the graph-global attributes. They will appear in the
133 # hash under their accessor names.
e309421a 134 print STDERR "Setting graph globals\n";
e3196b2a 135 $tradition->name( $graph_data->{'name'} );
e309421a 136 foreach my $gkey ( keys %{$graph_data->{'attr'}} ) {
137 my $val = $graph_data->{'attr'}->{$gkey};
138 $collation->$gkey( $val );
139 }
140
32014ec9 141 # Add the nodes to the graph.
142
143 my $extra_data = {}; # Keep track of data that needs to be processed
144 # after the nodes & edges are created.
f6066bac 145 print STDERR "Adding graph nodes\n";
32014ec9 146 foreach my $n ( @{$graph_data->{'nodes'}} ) {
94c00c71 147 # First extract the data that we can use without reference to
148 # anything else.
149 my %node_data = %$n; # Need $n itself untouched for edge processing
910a0a6d 150 my $nodeid = delete $node_data{$IDKEY};
151 my $reading = delete $node_data{$TOKENKEY};
94c00c71 152 my $class = delete $node_data{$CLASS_KEY} || '';
153 my $rank = delete $node_data{$RANK_KEY};
154
155 # Create the node. Current valid classes are common and meta.
0106ea2e 156 # Everything else is a normal reading.
94c00c71 157 my $gnode = $collation->add_reading( $nodeid );
158 $gnode->text( $reading );
159 $gnode->make_common if $class eq 'common';
160 $gnode->is_meta( 1 ) if $class eq 'meta';
0106ea2e 161 # This is a horrible hack.
162 $gnode->is_lacuna( $reading =~ /^\#LACUNA/ );
94c00c71 163 $gnode->rank( $rank ) if defined $rank;
164
165 # Now save the data that we need for post-processing,
910a0a6d 166 # if it exists.
167 if ( keys %node_data ) {
94c00c71 168 $extra_data->{$nodeid} = \%node_data
910a0a6d 169 }
32014ec9 170 }
910a0a6d 171
32014ec9 172 # Now add the edges.
f6066bac 173 print STDERR "Adding graph edges\n";
e867486f 174 my $has_ante_corr = {};
32014ec9 175 foreach my $e ( @{$graph_data->{'edges'}} ) {
94c00c71 176 my $from = $e->{$SOURCE_KEY};
177 my $to = $e->{$TARGET_KEY};
178 my $class = $e->{$CLASS_KEY};
179
180 # We may have more information depending on the class.
181 if( $class eq 'path' ) {
182 # We need the witness, and whether it is an 'extra' reading path.
183 my $wit = $e->{$WITNESS_KEY};
184 warn "No witness label on path edge!" unless $wit;
185 my $extra = $e->{$EXTRA_KEY};
186 my $label = $wit . ( $extra ? $collation->ac_label : '' );
187 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
188 # Add the witness if we don't have it already.
189 unless( $witnesses{$wit} ) {
190 $tradition->add_witness( sigil => $wit );
191 $witnesses{$wit} = 1;
192 }
e867486f 193 $has_ante_corr->{$wit} = 1 if $extra;
94c00c71 194 } elsif( $class eq 'relationship' ) {
c9bf3dbf 195 # We need the metadata about the relationship.
196 my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} };
197 $opts->{'equal_rank'} = $e->{'equal_rank'}
198 if exists $e->{'equal_rank'};
199 $opts->{'non_correctable'} = $e->{'non_correctable'}
200 if exists $e->{'non_correctable'};
201 $opts->{'non_independent'} = $e->{'non_independent'}
202 if exists $e->{'non_independent'};
203 warn "No relationship type for relationship edge!" unless $opts->{'type'};
decc2a20 204 my( $ok, @result ) = $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts );
205 unless( $ok ) {
206 warn "Did not add relationship: @result";
207 }
94c00c71 208 }
32014ec9 209 }
210
211 ## Deal with node information (transposition, relationships, etc.) that
212 ## needs to be processed after all the nodes are created.
94c00c71 213 print STDERR "Adding second-pass node data\n";
32014ec9 214 foreach my $nkey ( keys %$extra_data ) {
910a0a6d 215 foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
216 my $this_reading = $collation->reading( $nkey );
217 if( $edkey eq $TRANSPOS_KEY ) {
218 my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
910a0a6d 219 $this_reading->set_identical( $other_reading );
910a0a6d 220 } else {
221 warn "Unfamiliar reading node data $edkey for $nkey";
222 }
223 }
32014ec9 224 }
e309421a 225
226 # Set the $witness->path arrays for each wit.
e867486f 227 populate_witness_path( $tradition, $has_ante_corr );
32014ec9 228}
229
e867486f 2301;
231
232=head1 BUGS / TODO
233
234=over
235
236=item * Make this into a stream parser with GraphML
237
238=item * Simply field -> attribute correspondence for nodes and edges
239
240=item * Share key name constants with Collation.pm
241
32014ec9 242=back
243
244=head1 LICENSE
245
246This package is free software and is provided "as is" without express
247or implied warranty. You can redistribute it and/or modify it under
248the same terms as Perl itself.
249
250=head1 AUTHOR
251
e867486f 252Tara L Andrews E<lt>aurum@cpan.orgE<gt>