1 package Text::Tradition::Parser::CollateX;
5 use Text::Tradition::Parser::GraphML qw/ graphml_parse /;
9 Text::Tradition::Parser::CollateX
15 my $t_from_file = Text::Tradition->new(
17 'input' => 'CollateX',
18 'file' => '/path/to/collation.xml'
21 my $t_from_string = Text::Tradition->new(
23 'input' => 'CollateX',
24 'string' => $collation_xml,
29 Parser module for Text::Tradition, given a GraphML file from the
30 CollateX program that describes a collation graph. For further
31 information on the GraphML format for text collation, see
32 http://gregor.middell.net/collatex/
38 parse( $tradition, $init_options );
40 Takes an initialized Text::Tradition object and a set of options; creates
41 the appropriate nodes and edges on the graph. The options hash should
42 include either a 'file' argument or a 'string' argument, depending on the
43 source of the XML to be parsed.
48 binmode STDOUT, ":utf8";
49 binmode STDERR, ":utf8";
50 eval { no warnings; binmode $DB::OUT, ":utf8"; };
52 my $cxfile = 't/data/Collatex-16.xml';
53 my $t = Text::Tradition->new(
55 'input' => 'CollateX',
59 is( ref( $t ), 'Text::Tradition', "Parsed a CollateX input" );
61 is( scalar $t->collation->readings, 26, "Collation has all readings" );
62 is( scalar $t->collation->paths, 32, "Collation has all paths" );
63 is( scalar $t->witnesses, 3, "Collation has all witnesses" );
65 # Check an 'identical' node
66 my $transposed = $t->collation->reading( 'n15' );
67 my @related = $transposed->related_readings;
68 is( scalar @related, 1, "Reading links to transposed version" );
69 is( $related[0]->id, 'n18', "Correct transposition link" );
77 my $CONTENTKEY = 'tokens';
78 my $EDGETYPEKEY = 'type';
79 my $WITKEY = 'witnesses';
82 my( $tradition, $opts ) = @_;
83 my( $graph_data ) = graphml_parse( $opts );
84 my $collation = $tradition->collation;
86 # First add the readings to the graph.
87 ## Assume the start node has no text and id 0, and the end node has
88 ## no text and ID [number of nodes] - 1.
89 my $endnode = scalar @{$graph_data->{'nodes'}} - 1;
90 foreach my $n ( @{$graph_data->{'nodes'}} ) {
91 unless( defined $n->{$IDKEY} && defined $n->{$CONTENTKEY} ) {
92 if( defined $n->{$IDKEY} && $n->{$IDKEY} == 0 ) {
93 # It's the start node.
94 $n->{$IDKEY} = $collation->start->id;
95 } elsif ( defined $n->{$IDKEY} && $n->{$IDKEY} == $endnode ) {
97 $n->{$IDKEY} = $collation->end->id;
99 # Something is probably wrong.
100 warn "Did not find an ID or token for graph node, can't add it";
104 # Node ID should be an XML name, so prepend an 'n' if necessary.
105 if( $n->{$IDKEY} =~ /^\d/ ) {
106 $n->{$IDKEY} = 'n' . $n->{$IDKEY};
108 # Create the reading.
110 'id' => $n->{$IDKEY},
111 'text' => $n->{$CONTENTKEY},
113 my $gnode = $collation->add_reading( $gnode_args );
116 # Now add the path edges.
117 foreach my $e ( @{$graph_data->{'edges'}} ) {
118 my $from = $e->{'source'};
119 my $to = $e->{'target'};
121 ## Edge data keys are ID (which we don't need), witnesses, and type.
122 ## Type can be 'path' or 'relationship';
123 ## witnesses is a comma-separated list.
124 if( $e->{$EDGETYPEKEY} eq 'path' ) {
125 ## Add the path for each witness listesd.
126 # Create the witness objects if they does not yet exist.
127 foreach my $wit ( split( /, /, $e->{$WITKEY} ) ) {
128 unless( $tradition->witness( $wit ) ) {
129 $tradition->add_witness(
130 'sigil' => $wit, 'sourcetype' => 'collation' );
132 $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $wit );
134 } else { # type 'relationship'
135 if( $collation->linear ) {
136 $collation->add_relationship( $from->{$IDKEY}, $to->{$IDKEY},
137 { 'type' => 'transposition' } );
139 $collation->merge_readings( $from->{$IDKEY}, $to->{$IDKEY} );
145 $collation->calculate_common_readings()
146 if $collation->linear; # will implicitly rank
148 # Save the text for each witness so that we can ensure consistency
150 $tradition->collation->text_from_paths();
157 =item * Make this into a stream parser with GraphML
159 =item * Use CollateX-calculated ranks instead of recalculating our own
165 This package is free software and is provided "as is" without express
166 or implied warranty. You can redistribute it and/or modify it under
167 the same terms as Perl itself.
171 Tara L Andrews, aurum@cpan.org