1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
9 Text::Tradition::Parser::GraphML
15 my $t_from_file = Text::Tradition->new(
18 'file' => '/path/to/tradition.xml'
21 my $t_from_string = Text::Tradition->new(
24 'string' => $tradition_xml,
29 Parser module for Text::Tradition to read in its own GraphML output format.
30 GraphML is a relatively simple graph description language; a 'graph' element
31 can have 'node' and 'edge' elements, and each of these can have simple 'data'
32 elements for attributes to be saved.
34 The graph itself has attributes as in the Collation object:
44 =item * wit_list_separator
48 The node objects have the following attributes:
64 The edge objects have the following attributes:
70 =item * witness (for 'path' class edges)
72 =item * extra (for 'path' class edges)
74 =item * relationship (for 'relationship' class edges)
76 =item * equal_rank (for 'relationship' class edges)
78 =item * non_correctable (for 'relationship' class edges)
80 =item * non_independent (for 'relationship' class edges)
88 parse( $graph, $opts );
90 Takes an initialized Text::Tradition object and a set of options; creates
91 the appropriate nodes and edges on the graph. The options hash should
92 include either a 'file' argument or a 'string' argument, depending on the
93 source of the XML to be parsed.
98 binmode STDOUT, ":utf8";
99 binmode STDERR, ":utf8";
100 eval { no warnings; binmode $DB::OUT, ":utf8"; };
102 my $tradition = 't/data/florilegium_graphml.xml';
103 my $t = Text::Tradition->new(
106 'file' => $tradition,
109 is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
112 is( scalar $t->collation->paths, 376, "Collation has all paths" );
113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120 my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY,
121 $START_KEY, $END_KEY, $LACUNA_KEY, $COMMON_KEY,
122 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY,
123 $SCOPE_KEY, $CORRECT_KEY, $INDEP_KEY )
124 = qw/ id text identical rank
125 is_start is_end is_lacuna is_common
126 source target witness extra relationship
127 scope non_correctable non_independent /;
130 my( $tradition, $opts ) = @_;
132 # Collation data is in the first graph; relationship-specific stuff
134 my( $graph_data, $rel_data ) = graphml_parse( $opts );
136 my $collation = $tradition->collation;
139 # print STDERR "Setting graph globals\n";
140 $tradition->name( $graph_data->{'name'} );
142 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
143 my $val = $graph_data->{'global'}->{$gkey};
144 if( $gkey eq 'version' ) {
147 $collation->$gkey( $val );
151 # Add the nodes to the graph.
153 # print STDERR "Adding graph nodes\n";
154 foreach my $n ( @{$graph_data->{'nodes'}} ) {
155 # If it is the start or end node, we already have one, so
156 # grab the rank and go.
157 next if( defined $n->{$START_KEY} );
158 if( defined $n->{$END_KEY} ) {
159 $collation->end->rank( $n->{$RANK_KEY} );
163 # First extract the data that we can use without reference to
167 my $reading_options = {
168 'id' => $n->{$IDKEY},
169 'is_lacuna' => $n->{$LACUNA_KEY},
170 'is_common' => $n->{$COMMON_KEY},
172 my $rank = $n->{$RANK_KEY};
173 $reading_options->{'rank'} = $rank if $rank;
174 my $text = $n->{$TOKENKEY};
175 $reading_options->{'text'} = $text if $text;
177 my $gnode = $collation->add_reading( $reading_options );
181 # print STDERR "Adding graph edges\n";
182 foreach my $e ( @{$graph_data->{'edges'}} ) {
183 my $from = $e->{$SOURCE_KEY};
184 my $to = $e->{$TARGET_KEY};
186 # We need the witness, and whether it is an 'extra' reading path.
187 my $wit = $e->{$WITNESS_KEY};
188 warn "No witness label on path edge!" unless $wit;
189 my $extra = $e->{$EXTRA_KEY};
190 my $label = $wit . ( $extra ? $collation->ac_label : '' );
191 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
192 # Add the witness if we don't have it already.
193 unless( $witnesses{$wit} ) {
194 $tradition->add_witness( sigil => $wit );
195 $witnesses{$wit} = 1;
197 $tradition->witness( $wit )->is_layered( 1 ) if $extra;
200 ## Done with the main graph, now look at the relationships.
201 # Nodes are added via the call to add_reading above. We only need
202 # add the relationships themselves.
203 # TODO check that scoping does trt
204 foreach my $e ( @{$rel_data->{'edges'}} ) {
205 my $from = $e->{$SOURCE_KEY};
206 my $to = $e->{$TARGET_KEY};
207 my $relationship_opts = {
208 'type' => $e->{$RELATIONSHIP_KEY},
209 'scope' => $e->{$SCOPE_KEY},
211 $relationship_opts->{'non_correctable'} = $e->{$CORRECT_KEY}
212 if exists $e->{$CORRECT_KEY};
213 $relationship_opts->{'non_independent'} = $e->{$INDEP_KEY}
214 if exists $e->{$INDEP_KEY};
215 $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY},
216 $relationship_opts );
219 # Save the text for each witness so that we can ensure consistency
221 $tradition->collation->text_from_paths();
231 =item * Make this into a stream parser with GraphML
233 =item * Simply field -> attribute correspondence for nodes and edges
235 =item * Share key name constants with Collation.pm
241 This package is free software and is provided "as is" without express
242 or implied warranty. You can redistribute it and/or modify it under
243 the same terms as Perl itself.
247 Tara L Andrews E<lt>aurum@cpan.orgE<gt>