CHECKPOINT for laptop migration
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
5use Text::Tradition::Parser::GraphML;
6
7=head1 NAME
8
9Text::Tradition::Parser::GraphML
10
11=head1 DESCRIPTION
12
13Parser module for Text::Tradition to read in its own GraphML output format.
14TODO document what this format is.
15
16=head1 METHODS
17
18=over
19
20=item B<parse>
21
22parse( $graph, $graphml_string );
23
24Takes an initialized Text::Tradition::Graph object and a string
25containing the GraphML; creates the appropriate nodes and edges on the
26graph.
27
28=cut
29
f6066bac 30my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $POSITION_KEY, $CLASS_KEY )
31 = qw/ name reading identical position class /;
32014ec9 32
33sub parse {
34 my( $tradition, $graphml_str ) = @_;
f6066bac 35 $DB::single = 1;
32014ec9 36 my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str );
37
38 my $collation = $tradition->collation;
39 my %witnesses;
40
41 # Add the nodes to the graph.
42
43 my $extra_data = {}; # Keep track of data that needs to be processed
44 # after the nodes & edges are created.
f6066bac 45 print STDERR "Adding graph nodes\n";
32014ec9 46 foreach my $n ( @{$graph_data->{'nodes'}} ) {
f6066bac 47 # Each node is either a segment or a reading, depending on
48 # its class. Readings have text, segments don't.
32014ec9 49 my %node_data = %$n;
50 my $nodeid = delete $node_data{$IDKEY};
51 my $reading = delete $node_data{$TOKENKEY};
f6066bac 52 my $class = $node_data{$CLASS_KEY} || '';
53 # TODO this is a hack, fix it?
54 $class = 'reading' unless $class eq 'segment';
55 my $method = $class eq 'segment' ? "add_$class" : "add_reading";
56 my $gnode = $collation->$method( $nodeid );
57 $gnode->label( $reading );
58 $gnode->set_common if $class eq 'common';
32014ec9 59
60 # Now save the rest of the data, i.e. not the ID or label,
61 # if it exists.
62 if ( keys %node_data ) {
63 $extra_data->{$nodeid} = \%node_data;
64 }
65 }
66
67 # Now add the edges.
f6066bac 68 print STDERR "Adding graph edges\n";
32014ec9 69 foreach my $e ( @{$graph_data->{'edges'}} ) {
70 my %edge_data = %$e;
71 my $from = delete $edge_data{'source'};
72 my $to = delete $edge_data{'target'};
f6066bac 73 my $class = delete $edge_data{'class'};
32014ec9 74
75 # Whatever is left tells us what kind of edge it is.
76 foreach my $wkey ( keys %edge_data ) {
77 if( $wkey =~ /^witness/ ) {
f6066bac 78 unless( $class eq 'path' ) {
79 warn "Cannot add witness label to a $class edge";
80 next;
81 }
32014ec9 82 my $wit = $edge_data{$wkey};
83 unless( $witnesses{$wit} ) {
84 $tradition->add_witness( sigil => $wit );
85 $witnesses{$wit} = 1;
86 }
87 my $label = $wkey eq 'witness_ante_corr'
88 ? $wit . $collation->ac_label : $wit;
89 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
f6066bac 90 } elsif( $wkey eq 'relationship' ) {
91 unless( $class eq 'relationship' ) {
92 warn "Cannot add relationship label to a $class edge";
93 next;
94 }
32014ec9 95 my $rel = $edge_data{$wkey};
96 # TODO handle global relationships
97 $collation->add_relationship( $rel, $from->{$IDKEY}, $to->{$IDKEY} );
f6066bac 98 } else {
99 my $seg_edge = $collation->graph->add_edge( $from->{$IDKEY}, $to->{$IDKEY} );
100 $seg_edge->set_attribute( 'class', 'segment' );
32014ec9 101 }
102 }
103 }
104
105 ## Deal with node information (transposition, relationships, etc.) that
106 ## needs to be processed after all the nodes are created.
f6066bac 107 print STDERR "Adding second-pass data\n";
108 my $linear = undef;
32014ec9 109 foreach my $nkey ( keys %$extra_data ) {
110 foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
111 my $this_reading = $collation->reading( $nkey );
112 if( $edkey eq $TRANSPOS_KEY ) {
113 my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
f6066bac 114 # We evidently have a linear graph.
115 $linear = 1;
116 $this_reading->set_identical( $other_reading );
32014ec9 117 } elsif ( $edkey eq $POSITION_KEY ) {
118 $this_reading->position( $extra_data->{$nkey}->{$edkey} );
119 } else {
120 warn "Unfamiliar reading node data $edkey for $nkey";
121 }
122 }
123 }
f6066bac 124 $collation->linear( $linear );
32014ec9 125
126 # We know what the beginning and ending nodes are, no need to
127 # search or reset.
128 my $end_node = $collation->reading( '#END#' );
32014ec9 129 # Walk the paths and make reading sequences for our witnesses.
f6066bac 130 # No need to calculate positions as we have them already.
32014ec9 131 $collation->walk_witness_paths( $end_node );
132}
133
134=back
135
136=head1 LICENSE
137
138This package is free software and is provided "as is" without express
139or implied warranty. You can redistribute it and/or modify it under
140the same terms as Perl itself.
141
142=head1 AUTHOR
143
144Tara L Andrews, aurum@cpan.org
145
146=cut
147
1481;