1 package Text::Tradition::Parser::Self;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
9 Text::Tradition::Parser::GraphML
15 my $t_from_file = Text::Tradition->new(
18 'file' => '/path/to/tradition.xml'
21 my $t_from_string = Text::Tradition->new(
24 'string' => $tradition_xml,
29 Parser module for Text::Tradition to read in its own GraphML output format.
30 GraphML is a relatively simple graph description language; a 'graph' element
31 can have 'node' and 'edge' elements, and each of these can have simple 'data'
32 elements for attributes to be saved.
34 The graph itself has attributes as in the Collation object:
44 =item * wit_list_separator
48 The node objects have the following attributes:
64 The edge objects have the following attributes:
70 =item * witness (for 'path' class edges)
72 =item * extra (for 'path' class edges)
74 =item * relationship (for 'relationship' class edges)
76 =item * equal_rank (for 'relationship' class edges)
78 =item * non_correctable (for 'relationship' class edges)
80 =item * non_independent (for 'relationship' class edges)
88 parse( $graph, $opts );
90 Takes an initialized Text::Tradition object and a set of options; creates
91 the appropriate nodes and edges on the graph. The options hash should
92 include either a 'file' argument or a 'string' argument, depending on the
93 source of the XML to be parsed.
98 binmode STDOUT, ":utf8";
99 binmode STDERR, ":utf8";
100 eval { no warnings; binmode $DB::OUT, ":utf8"; };
102 my $tradition = 't/data/florilegium_graphml.xml';
103 my $t = Text::Tradition->new(
106 'file' => $tradition,
109 is( ref( $t ), 'Text::Tradition', "Parsed our own GraphML" );
111 is( scalar $t->collation->readings, 319, "Collation has all readings" );
112 is( scalar $t->collation->paths, 376, "Collation has all paths" );
113 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
120 my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY,
121 $START_KEY, $END_KEY, $LACUNA_KEY,
122 $SOURCE_KEY, $TARGET_KEY, $WITNESS_KEY, $EXTRA_KEY, $RELATIONSHIP_KEY,
123 $SCOPE_KEY, $CORRECT_KEY, $INDEP_KEY )
124 = qw/ id text identical rank
125 is_start is_end is_lacuna
126 source target witness extra relationship
127 scope non_correctable non_independent /;
130 my( $tradition, $opts ) = @_;
132 # Collation data is in the first graph; relationship-specific stuff
134 my( $graph_data, $rel_data ) = graphml_parse( $opts );
136 my $collation = $tradition->collation;
139 # print STDERR "Setting graph globals\n";
140 $tradition->name( $graph_data->{'name'} );
142 foreach my $gkey ( keys %{$graph_data->{'global'}} ) {
143 my $val = $graph_data->{'global'}->{$gkey};
144 if( $gkey eq 'version' ) {
147 $collation->$gkey( $val );
151 # Add the nodes to the graph.
153 # print STDERR "Adding graph nodes\n";
154 foreach my $n ( @{$graph_data->{'nodes'}} ) {
155 # If it is the start or end node, we already have one, so
156 # grab the rank and go.
157 next if( defined $n->{$START_KEY} );
158 if( defined $n->{$END_KEY} ) {
159 $collation->end->rank( $n->{$RANK_KEY} );
163 # First extract the data that we can use without reference to
167 my $reading_options = {
168 'id' => $n->{$IDKEY},
169 'is_lacuna' => $n->{$LACUNA_KEY},
171 my $rank = $n->{$RANK_KEY};
172 $reading_options->{'rank'} = $rank if $rank;
173 my $text = $n->{$TOKENKEY};
174 $reading_options->{'text'} = $text if $text;
176 my $gnode = $collation->add_reading( $reading_options );
180 # print STDERR "Adding graph edges\n";
181 foreach my $e ( @{$graph_data->{'edges'}} ) {
182 my $from = $e->{$SOURCE_KEY};
183 my $to = $e->{$TARGET_KEY};
185 # We need the witness, and whether it is an 'extra' reading path.
186 my $wit = $e->{$WITNESS_KEY};
187 warn "No witness label on path edge!" unless $wit;
188 my $extra = $e->{$EXTRA_KEY};
189 my $label = $wit . ( $extra ? $collation->ac_label : '' );
190 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $label );
191 # Add the witness if we don't have it already.
192 unless( $witnesses{$wit} ) {
193 $tradition->add_witness( sigil => $wit );
194 $witnesses{$wit} = 1;
196 $tradition->witness( $wit )->is_layered( 1 ) if $extra;
199 ## Done with the main graph, now look at the relationships.
200 # Nodes are added via the call to add_reading above. We only need
201 # add the relationships themselves.
202 # TODO check that scoping does trt
203 foreach my $e ( @{$rel_data->{'edges'}} ) {
204 my $from = $e->{$SOURCE_KEY};
205 my $to = $e->{$TARGET_KEY};
206 my $relationship_opts = {
207 'type' => $e->{$RELATIONSHIP_KEY},
208 'scope' => $e->{$SCOPE_KEY},
210 $relationship_opts->{'non_correctable'} = $e->{$CORRECT_KEY}
211 if exists $e->{$CORRECT_KEY};
212 $relationship_opts->{'non_independent'} = $e->{$INDEP_KEY}
213 if exists $e->{$INDEP_KEY};
214 $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY},
215 $relationship_opts );
218 # Save the text for each witness so that we can ensure consistency
220 $tradition->collation->text_from_paths();
230 =item * Make this into a stream parser with GraphML
232 =item * Simply field -> attribute correspondence for nodes and edges
234 =item * Share key name constants with Collation.pm
240 This package is free software and is provided "as is" without express
241 or implied warranty. You can redistribute it and/or modify it under
242 the same terms as Perl itself.
246 Tara L Andrews E<lt>aurum@cpan.orgE<gt>