Commit | Line | Data |
32014ec9 |
1 | package Text::Tradition::Parser::Self; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Text::Tradition::Parser::GraphML; |
6 | |
7 | =head1 NAME |
8 | |
9 | Text::Tradition::Parser::GraphML |
10 | |
11 | =head1 DESCRIPTION |
12 | |
13 | Parser module for Text::Tradition to read in its own GraphML output format. |
14 | TODO document what this format is. |
15 | |
16 | =head1 METHODS |
17 | |
18 | =over |
19 | |
20 | =item B<parse> |
21 | |
22 | parse( $graph, $graphml_string ); |
23 | |
24 | Takes an initialized Text::Tradition::Graph object and a string |
25 | containing the GraphML; creates the appropriate nodes and edges on the |
26 | graph. |
27 | |
28 | =cut |
29 | |
94c00c71 |
30 | # TODO share these with Collation.pm somehow |
31 | my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY, |
32 | $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY ) |
33 | = qw/ name reading identical rank class |
34 | source target witness extra relationship/; |
32014ec9 |
35 | |
36 | sub parse { |
37 | my( $tradition, $graphml_str ) = @_; |
94c00c71 |
38 | |
39 | # TODO this is begging for stream parsing instead of multiple loops. |
32014ec9 |
40 | my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str ); |
41 | |
42 | my $collation = $tradition->collation; |
43 | my %witnesses; |
e309421a |
44 | |
45 | # Set up the graph-global attributes. They will appear in the |
46 | # hash under their accessor names. |
47 | # TODO Consider simplifying this for nodes & edges as well. |
48 | print STDERR "Setting graph globals\n"; |
49 | foreach my $gkey ( keys %{$graph_data->{'attr'}} ) { |
50 | my $val = $graph_data->{'attr'}->{$gkey}; |
51 | $collation->$gkey( $val ); |
52 | } |
53 | |
32014ec9 |
54 | # Add the nodes to the graph. |
910a0a6d |
55 | # TODO Are we adding extra start/end nodes? |
32014ec9 |
56 | |
57 | my $extra_data = {}; # Keep track of data that needs to be processed |
58 | # after the nodes & edges are created. |
f6066bac |
59 | print STDERR "Adding graph nodes\n"; |
32014ec9 |
60 | foreach my $n ( @{$graph_data->{'nodes'}} ) { |
94c00c71 |
61 | # First extract the data that we can use without reference to |
62 | # anything else. |
63 | my %node_data = %$n; # Need $n itself untouched for edge processing |
910a0a6d |
64 | my $nodeid = delete $node_data{$IDKEY}; |
65 | my $reading = delete $node_data{$TOKENKEY}; |
94c00c71 |
66 | my $class = delete $node_data{$CLASS_KEY} || ''; |
67 | my $rank = delete $node_data{$RANK_KEY}; |
68 | |
69 | # Create the node. Current valid classes are common and meta. |
70 | # Everything else is a normal reading. |
71 | my $gnode = $collation->add_reading( $nodeid ); |
72 | $gnode->text( $reading ); |
73 | $gnode->make_common if $class eq 'common'; |
74 | $gnode->is_meta( 1 ) if $class eq 'meta'; |
75 | $gnode->rank( $rank ) if defined $rank; |
76 | |
77 | # Now save the data that we need for post-processing, |
910a0a6d |
78 | # if it exists. |
79 | if ( keys %node_data ) { |
94c00c71 |
80 | $extra_data->{$nodeid} = \%node_data |
910a0a6d |
81 | } |
32014ec9 |
82 | } |
910a0a6d |
83 | |
32014ec9 |
84 | # Now add the edges. |
f6066bac |
85 | print STDERR "Adding graph edges\n"; |
32014ec9 |
86 | foreach my $e ( @{$graph_data->{'edges'}} ) { |
94c00c71 |
87 | my $from = $e->{$SOURCE_KEY}; |
88 | my $to = $e->{$TARGET_KEY}; |
89 | my $class = $e->{$CLASS_KEY}; |
90 | |
91 | # We may have more information depending on the class. |
92 | if( $class eq 'path' ) { |
93 | # We need the witness, and whether it is an 'extra' reading path. |
94 | my $wit = $e->{$WITNESS_KEY}; |
95 | warn "No witness label on path edge!" unless $wit; |
96 | my $extra = $e->{$EXTRA_KEY}; |
97 | my $label = $wit . ( $extra ? $collation->ac_label : '' ); |
98 | $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label ); |
99 | # Add the witness if we don't have it already. |
100 | unless( $witnesses{$wit} ) { |
101 | $tradition->add_witness( sigil => $wit ); |
102 | $witnesses{$wit} = 1; |
103 | } |
e309421a |
104 | $witnesses{$wit} = 2 if $extra; |
94c00c71 |
105 | } elsif( $class eq 'relationship' ) { |
c9bf3dbf |
106 | # We need the metadata about the relationship. |
107 | my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} }; |
108 | $opts->{'equal_rank'} = $e->{'equal_rank'} |
109 | if exists $e->{'equal_rank'}; |
110 | $opts->{'non_correctable'} = $e->{'non_correctable'} |
111 | if exists $e->{'non_correctable'}; |
112 | $opts->{'non_independent'} = $e->{'non_independent'} |
113 | if exists $e->{'non_independent'}; |
114 | warn "No relationship type for relationship edge!" unless $opts->{'type'}; |
115 | $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY}, $opts ); |
94c00c71 |
116 | } |
32014ec9 |
117 | } |
118 | |
119 | ## Deal with node information (transposition, relationships, etc.) that |
120 | ## needs to be processed after all the nodes are created. |
94c00c71 |
121 | print STDERR "Adding second-pass node data\n"; |
32014ec9 |
122 | foreach my $nkey ( keys %$extra_data ) { |
910a0a6d |
123 | foreach my $edkey ( keys %{$extra_data->{$nkey}} ) { |
124 | my $this_reading = $collation->reading( $nkey ); |
125 | if( $edkey eq $TRANSPOS_KEY ) { |
126 | my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} ); |
910a0a6d |
127 | $this_reading->set_identical( $other_reading ); |
910a0a6d |
128 | } else { |
129 | warn "Unfamiliar reading node data $edkey for $nkey"; |
130 | } |
131 | } |
32014ec9 |
132 | } |
e309421a |
133 | |
134 | # Set the $witness->path arrays for each wit. |
135 | print STDERR "Walking paths for witnesses\n"; |
136 | foreach my $wit ( $tradition->witnesses ) { |
137 | my @path = $collation->reading_sequence( $collation->start, $collation->end, |
138 | $wit->sigil ); |
139 | $wit->path( \@path ); |
140 | if( $witnesses{$wit->sigil} == 2 ) { |
141 | # Get the uncorrected path too |
142 | my @uc = $collation->reading_sequence( $collation->start, $collation->end, |
143 | $wit->sigil . $collation->ac_label, $wit->sigil ); |
144 | $wit->uncorrected_path( \@uc ); |
145 | } |
146 | } |
32014ec9 |
147 | } |
148 | |
149 | =back |
150 | |
151 | =head1 LICENSE |
152 | |
153 | This package is free software and is provided "as is" without express |
154 | or implied warranty. You can redistribute it and/or modify it under |
155 | the same terms as Perl itself. |
156 | |
157 | =head1 AUTHOR |
158 | |
159 | Tara L Andrews, aurum@cpan.org |
160 | |
161 | =cut |
162 | |
163 | 1; |