Our own variation on GraphML parsing
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Self.pm
CommitLineData
32014ec9 1package Text::Tradition::Parser::Self;
2
3use strict;
4use warnings;
5use Text::Tradition::Parser::GraphML;
6
7=head1 NAME
8
9Text::Tradition::Parser::GraphML
10
11=head1 DESCRIPTION
12
13Parser module for Text::Tradition to read in its own GraphML output format.
14TODO document what this format is.
15
16=head1 METHODS
17
18=over
19
20=item B<parse>
21
22parse( $graph, $graphml_string );
23
24Takes an initialized Text::Tradition::Graph object and a string
25containing the GraphML; creates the appropriate nodes and edges on the
26graph.
27
28=cut
29
30my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $POSITION_KEY )
31 = qw/ name reading identical position /;
32
33sub parse {
34 my( $tradition, $graphml_str ) = @_;
35 my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str );
36
37 my $collation = $tradition->collation;
38 my %witnesses;
39
40 # Add the nodes to the graph.
41
42 my $extra_data = {}; # Keep track of data that needs to be processed
43 # after the nodes & edges are created.
44 foreach my $n ( @{$graph_data->{'nodes'}} ) {
45 # Could use a better way of registering these
46 my %node_data = %$n;
47 my $nodeid = delete $node_data{$IDKEY};
48 my $reading = delete $node_data{$TOKENKEY};
49 my $gnode = $collation->add_reading( $nodeid );
50 $gnode->text( $reading );
51
52 # Now save the rest of the data, i.e. not the ID or label,
53 # if it exists.
54 if ( keys %node_data ) {
55 $extra_data->{$nodeid} = \%node_data;
56 }
57 }
58
59 # Now add the edges.
60 foreach my $e ( @{$graph_data->{'edges'}} ) {
61 my %edge_data = %$e;
62 my $from = delete $edge_data{'source'};
63 my $to = delete $edge_data{'target'};
64
65 # Whatever is left tells us what kind of edge it is.
66 foreach my $wkey ( keys %edge_data ) {
67 if( $wkey =~ /^witness/ ) {
68 my $wit = $edge_data{$wkey};
69 unless( $witnesses{$wit} ) {
70 $tradition->add_witness( sigil => $wit );
71 $witnesses{$wit} = 1;
72 }
73 my $label = $wkey eq 'witness_ante_corr'
74 ? $wit . $collation->ac_label : $wit;
75 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
76 } else {
77 my $rel = $edge_data{$wkey};
78 # TODO handle global relationships
79 $collation->add_relationship( $rel, $from->{$IDKEY}, $to->{$IDKEY} );
80 }
81 }
82 }
83
84 ## Deal with node information (transposition, relationships, etc.) that
85 ## needs to be processed after all the nodes are created.
86 foreach my $nkey ( keys %$extra_data ) {
87 foreach my $edkey ( keys %{$extra_data->{$nkey}} ) {
88 my $this_reading = $collation->reading( $nkey );
89 if( $edkey eq $TRANSPOS_KEY ) {
90 my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} );
91 if( $collation->linear ) {
92 $this_reading->set_identical( $other_reading );
93 } else {
94 $collation->merge_readings( $other_reading, $this_reading );
95 }
96 } elsif ( $edkey eq $POSITION_KEY ) {
97 $this_reading->position( $extra_data->{$nkey}->{$edkey} );
98 } else {
99 warn "Unfamiliar reading node data $edkey for $nkey";
100 }
101 }
102 }
103
104 # We know what the beginning and ending nodes are, no need to
105 # search or reset.
106 my $end_node = $collation->reading( '#END#' );
107 $DB::single = 1;
108 # Walk the paths and make reading sequences for our witnesses.
109 $collation->walk_witness_paths( $end_node );
110}
111
112=back
113
114=head1 LICENSE
115
116This package is free software and is provided "as is" without express
117or implied warranty. You can redistribute it and/or modify it under
118the same terms as Perl itself.
119
120=head1 AUTHOR
121
122Tara L Andrews, aurum@cpan.org
123
124=cut
125
1261;