move around some doc/testing logic
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
5use Text::Tradition::Parser::GraphML;
6
7=head1 NAME
8
9Text::Tradition::Parser::GraphML
10
11=head1 DESCRIPTION
12
13Parser module for Text::Tradition to read in its own GraphML output format.
14TODO document what this format is.
15
16=head1 METHODS
17
18=over
19
20=item B<parse>
21
22parse( $graph, $graphml_string );
23
24Takes an initialized Text::Tradition::Graph object and a string
25containing the GraphML; creates the appropriate nodes and edges on the
26graph.
27
28=cut
29
94c00c71 30# TODO share these with Collation.pm somehow
31my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY,
32 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY )
33 = qw/ name reading identical rank class
34 source target witness extra relationship/;
32014ec9 35
36sub parse {
dfc37e38 37 my( $tradition, $opts ) = @_;
38 my $graph_data = Text::Tradition::Parser::GraphML::parse( $opts );
94c00c71 39
40 # TODO this is begging for stream parsing instead of multiple loops.
32014ec9 41
42 my $collation = $tradition->collation;
43 my %witnesses;
e309421a 44
45 # Set up the graph-global attributes. They will appear in the
46 # hash under their accessor names.
47 # TODO Consider simplifying this for nodes & edges as well.
48 print STDERR "Setting graph globals\n";
49 foreach my $gkey ( keys %{$graph_data->{'attr'}} ) {
50 my $val = $graph_data->{'attr'}->{$gkey};
51 $collation->$gkey( $val );
52 }
53
32014ec9 54 # Add the nodes to the graph.
910a0a6d 55 # TODO Are we adding extra start/end nodes?
32014ec9 56
57 my $extra_data = {}; # Keep track of data that needs to be processed
58 # after the nodes & edges are created.
f6066bac 59 print STDERR "Adding graph nodes\n";
32014ec9 60 foreach my $n ( @{$graph_data->{'nodes'}} ) {
94c00c71 61 # First extract the data that we can use without reference to
62 # anything else.
63 my %node_data = %$n; # Need $n itself untouched for edge processing
910a0a6d 64 my $nodeid = delete $node_data{$IDKEY};
65 my $reading = delete $node_data{$TOKENKEY};
94c00c71 66 my $class = delete $node_data{$CLASS_KEY} || '';
67 my $rank = delete $node_data{$RANK_KEY};
68
69 # Create the node. Current valid classes are common and meta.
0106ea2e 70 # Everything else is a normal reading.
71 ## TODO RATIONALIZE THESE CLASSES
94c00c71 72 my $gnode = $collation->add_reading( $nodeid );
73 $gnode->text( $reading );
74 $gnode->make_common if $class eq 'common';
75 $gnode->is_meta( 1 ) if $class eq 'meta';
0106ea2e 76 # This is a horrible hack.
77 $gnode->is_lacuna( $reading =~ /^\#LACUNA/ );
94c00c71 78 $gnode->rank( $rank ) if defined $rank;
79
80 # Now save the data that we need for post-processing,
910a0a6d 81 # if it exists.
82 if ( keys %node_data ) {
94c00c71 83 $extra_data->{$nodeid} = \%node_data
910a0a6d 84 }
32014ec9 85 }
910a0a6d 86
32014ec9 87 # Now add the edges.
f6066bac 88 print STDERR "Adding graph edges\n";
32014ec9 89 foreach my $e ( @{$graph_data->{'edges'}} ) {
94c00c71 90 my $from = $e->{$SOURCE_KEY};
91 my $to = $e->{$TARGET_KEY};
92 my $class = $e->{$CLASS_KEY};
93
94 # We may have more information depending on the class.
95 if( $class eq 'path' ) {
96 # We need the witness, and whether it is an 'extra' reading path.
97 my $wit = $e->{$WITNESS_KEY};
98 warn "No witness label on path edge!" unless $wit;
99 my $extra = $e->{$EXTRA_KEY};
100 my $label = $wit . ( $extra ? $collation->ac_label : '' );
101 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
102 # Add the witness if we don't have it already.
103 unless( $witnesses{$wit} ) {
104 $tradition->add_witness( sigil => $wit );
105 $witnesses{$wit} = 1;
106 }
e309421a 107 $witnesses{$wit} = 2 if $extra;
94c00c71 108 } elsif( $class eq 'relationship' ) {
c9bf3dbf 109 # We need the metadata about the relationship.
110 my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} };
111 $opts->{'equal_rank'} = $e->{'equal_rank'}
112 if exists $e->{'equal_rank'};
113 $opts->{'non_correctable'} = $e->{'non_correctable'}
114 if exists $e->{'non_correctable'};
115 $opts->{'non_independent'} = $e->{'non_independent'}
116 if exists $e->{'non_independent'};
117 warn "No relationship type for relationship edge!" unless $opts->{'type'};
118 $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts );
94c00c71 119 }
32014ec9 120 }
121
122 ## Deal with node information (transposition, relationships, etc.) that
123 ## needs to be processed after all the nodes are created.
94c00c71 124 print STDERR "Adding second-pass node data\n";
32014ec9 125 foreach my $nkey ( keys %$extra_data ) {
910a0a6d 126 foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
127 my $this_reading = $collation->reading( $nkey );
128 if( $edkey eq $TRANSPOS_KEY ) {
129 my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
910a0a6d 130 $this_reading->set_identical( $other_reading );
910a0a6d 131 } else {
132 warn "Unfamiliar reading node data $edkey for $nkey";
133 }
134 }
32014ec9 135 }
e309421a 136
137 # Set the $witness->path arrays for each wit.
138 print STDERR "Walking paths for witnesses\n";
139 foreach my $wit ( $tradition->witnesses ) {
140 my @path = $collation->reading_sequence( $collation->start, $collation->end,
141 $wit->sigil );
142 $wit->path( \@path );
143 if( $witnesses{$wit->sigil} == 2 ) {
144 # Get the uncorrected path too
145 my @uc = $collation->reading_sequence( $collation->start, $collation->end,
146 $wit->sigil . $collation->ac_label, $wit->sigil );
147 $wit->uncorrected_path( \@uc );
148 }
149 }
32014ec9 150}
151
152=back
153
154=head1 LICENSE
155
156This package is free software and is provided "as is" without express
157or implied warranty. You can redistribute it and/or modify it under
158the same terms as Perl itself.
159
160=head1 AUTHOR
161
162Tara L Andrews, aurum@cpan.org
163
164=cut
165
1661;