Commit | Line | Data |
32014ec9 |
1 | package Text::Tradition::Parser::Self; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Text::Tradition::Parser::GraphML; |
6 | |
7 | =head1 NAME |
8 | |
9 | Text::Tradition::Parser::GraphML |
10 | |
11 | =head1 DESCRIPTION |
12 | |
13 | Parser module for Text::Tradition to read in its own GraphML output format. |
14 | TODO document what this format is. |
15 | |
16 | =head1 METHODS |
17 | |
18 | =over |
19 | |
20 | =item B<parse> |
21 | |
22 | parse( $graph, $graphml_string ); |
23 | |
24 | Takes an initialized Text::Tradition::Graph object and a string |
25 | containing the GraphML; creates the appropriate nodes and edges on the |
26 | graph. |
27 | |
28 | =cut |
29 | |
f6066bac |
30 | my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $POSITION_KEY, $CLASS_KEY ) |
31 | = qw/ name reading identical position class /; |
32014ec9 |
32 | |
33 | sub parse { |
34 | my( $tradition, $graphml_str ) = @_; |
f6066bac |
35 | $DB::single = 1; |
32014ec9 |
36 | my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str ); |
37 | |
38 | my $collation = $tradition->collation; |
39 | my %witnesses; |
40 | |
41 | # Add the nodes to the graph. |
42 | |
43 | my $extra_data = {}; # Keep track of data that needs to be processed |
44 | # after the nodes & edges are created. |
f6066bac |
45 | print STDERR "Adding graph nodes\n"; |
32014ec9 |
46 | foreach my $n ( @{$graph_data->{'nodes'}} ) { |
f6066bac |
47 | # Each node is either a segment or a reading, depending on |
48 | # its class. Readings have text, segments don't. |
32014ec9 |
49 | my %node_data = %$n; |
50 | my $nodeid = delete $node_data{$IDKEY}; |
51 | my $reading = delete $node_data{$TOKENKEY}; |
f6066bac |
52 | my $class = $node_data{$CLASS_KEY} || ''; |
53 | # TODO this is a hack, fix it? |
54 | $class = 'reading' unless $class eq 'segment'; |
55 | my $method = $class eq 'segment' ? "add_$class" : "add_reading"; |
56 | my $gnode = $collation->$method( $nodeid ); |
57 | $gnode->label( $reading ); |
58 | $gnode->set_common if $class eq 'common'; |
32014ec9 |
59 | |
60 | # Now save the rest of the data, i.e. not the ID or label, |
61 | # if it exists. |
62 | if ( keys %node_data ) { |
63 | $extra_data->{$nodeid} = \%node_data; |
64 | } |
65 | } |
66 | |
67 | # Now add the edges. |
f6066bac |
68 | print STDERR "Adding graph edges\n"; |
32014ec9 |
69 | foreach my $e ( @{$graph_data->{'edges'}} ) { |
70 | my %edge_data = %$e; |
71 | my $from = delete $edge_data{'source'}; |
72 | my $to = delete $edge_data{'target'}; |
f6066bac |
73 | my $class = delete $edge_data{'class'}; |
32014ec9 |
74 | |
75 | # Whatever is left tells us what kind of edge it is. |
76 | foreach my $wkey ( keys %edge_data ) { |
77 | if( $wkey =~ /^witness/ ) { |
f6066bac |
78 | unless( $class eq 'path' ) { |
79 | warn "Cannot add witness label to a $class edge"; |
80 | next; |
81 | } |
32014ec9 |
82 | my $wit = $edge_data{$wkey}; |
83 | unless( $witnesses{$wit} ) { |
84 | $tradition->add_witness( sigil => $wit ); |
85 | $witnesses{$wit} = 1; |
86 | } |
87 | my $label = $wkey eq 'witness_ante_corr' |
88 | ? $wit . $collation->ac_label : $wit; |
89 | $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label ); |
f6066bac |
90 | } elsif( $wkey eq 'relationship' ) { |
91 | unless( $class eq 'relationship' ) { |
92 | warn "Cannot add relationship label to a $class edge"; |
93 | next; |
94 | } |
32014ec9 |
95 | my $rel = $edge_data{$wkey}; |
96 | # TODO handle global relationships |
97 | $collation->add_relationship( $rel, $from->{$IDKEY}, $to->{$IDKEY} ); |
f6066bac |
98 | } else { |
99 | my $seg_edge = $collation->graph->add_edge( $from->{$IDKEY}, $to->{$IDKEY} ); |
100 | $seg_edge->set_attribute( 'class', 'segment' ); |
32014ec9 |
101 | } |
102 | } |
103 | } |
104 | |
105 | ## Deal with node information (transposition, relationships, etc.) that |
106 | ## needs to be processed after all the nodes are created. |
f6066bac |
107 | print STDERR "Adding second-pass data\n"; |
108 | my $linear = undef; |
32014ec9 |
109 | foreach my $nkey ( keys %$extra_data ) { |
110 | foreach my $edkey ( keys %{$extra_data->{$nkey}} ) { |
111 | my $this_reading = $collation->reading( $nkey ); |
112 | if( $edkey eq $TRANSPOS_KEY ) { |
113 | my $other_reading = $collation->reading( $extra_data->{$nkey}->{$edkey} ); |
f6066bac |
114 | # We evidently have a linear graph. |
115 | $linear = 1; |
116 | $this_reading->set_identical( $other_reading ); |
32014ec9 |
117 | } elsif ( $edkey eq $POSITION_KEY ) { |
118 | $this_reading->position( $extra_data->{$nkey}->{$edkey} ); |
119 | } else { |
120 | warn "Unfamiliar reading node data $edkey for $nkey"; |
121 | } |
122 | } |
123 | } |
f6066bac |
124 | $collation->linear( $linear ); |
32014ec9 |
125 | |
126 | # We know what the beginning and ending nodes are, no need to |
127 | # search or reset. |
128 | my $end_node = $collation->reading( '#END#' ); |
32014ec9 |
129 | # Walk the paths and make reading sequences for our witnesses. |
f6066bac |
130 | # No need to calculate positions as we have them already. |
32014ec9 |
131 | $collation->walk_witness_paths( $end_node ); |
132 | } |
133 | |
134 | =back |
135 | |
136 | =head1 LICENSE |
137 | |
138 | This package is free software and is provided "as is" without express |
139 | or implied warranty. You can redistribute it and/or modify it under |
140 | the same terms as Perl itself. |
141 | |
142 | =head1 AUTHOR |
143 | |
144 | Tara L Andrews, aurum@cpan.org |
145 | |
146 | =cut |
147 | |
148 | 1; |