Commit | Line | Data |
cda6a45b |
1 | package Text::Tradition::Parser::CollateX; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Text::Tradition::Parser::GraphML; |
6 | |
7 | =head1 NAME |
8 | |
9 | Text::Tradition::Parser::CollateX |
10 | |
11 | =head1 DESCRIPTION |
12 | |
13 | Parser module for Text::Tradition, given a GraphML file from the |
14 | CollateX program that describes a collation graph. For further |
15 | information on the GraphML format for text collation, see |
16 | http://gregor.middell.net/collatex/ |
17 | |
18 | =head1 METHODS |
19 | |
20 | =over |
21 | |
22 | =item B<parse> |
23 | |
24 | parse( $graph, $graphml_string ); |
25 | |
26 | Takes an initialized Text::Tradition::Graph object and a string |
27 | containing the GraphML; creates the appropriate nodes and edges on the |
28 | graph. |
29 | |
30 | =cut |
31 | |
32 | my $IDKEY = 'number'; |
33 | my $CONTENTKEY = 'token'; |
34 | my $TRANSKEY = 'identical'; |
35 | |
36 | sub parse { |
37 | my( $tradition, $graphml_str ) = @_; |
38 | my $graph_data = Text::Tradition::Parser::GraphML::parse( $graphml_str ); |
39 | my $collation = $tradition->collation; |
40 | my %witnesses; # Keep track of the witnesses we encounter as we |
41 | # run through the graph data. |
42 | |
43 | # Add the nodes to the graph. First delete the start node, because |
44 | # GraphML graphs will have their own start nodes. |
45 | $collation->del_reading( $collation->start() ); |
46 | |
47 | my $extra_data = {}; # Keep track of info to be processed after all |
48 | # nodes have been created |
49 | foreach my $n ( @{$graph_data->{'nodes'}} ) { |
50 | my %node_data = %$n; |
51 | my $nodeid = delete $node_data{$IDKEY}; |
52 | my $token = delete $node_data{$CONTENTKEY}; |
53 | unless( $nodeid && $token ) { |
54 | warn "Did not find an ID or token for graph node, can't add it"; |
55 | next; |
56 | } |
57 | my $gnode = $collation->add_reading( $nodeid ); |
58 | $gnode->text( $token ); |
59 | |
60 | # Whatever is left is extra info to be processed later. |
61 | if( keys %node_data ) { |
62 | $extra_data->{$nodeid} = \%node_data; |
63 | } |
64 | } |
65 | |
66 | # Now add the edges. |
67 | foreach my $e ( @{$graph_data->{'edges'}} ) { |
68 | my %edge_data = %$e; |
69 | my $from = delete $edge_data{'source'}; |
70 | my $to = delete $edge_data{'target'}; |
71 | |
72 | # In CollateX, we have a distinct witness data ID per witness, |
73 | # so that we can have multiple witnesses per edge. We want to |
74 | # translate this to one witness per edge in our own |
75 | # representation. |
76 | foreach my $ekey ( keys %edge_data ) { |
77 | my $wit = $edge_data{$ekey}; |
78 | # Create the witness object if it does not yet exist. |
79 | unless( $witnesses{$wit} ) { |
80 | $tradition->add_witness( 'sigil' => $wit ); |
81 | $witnesses{$wit} = 1; |
82 | } |
83 | $collation->add_path( $from->{$IDKEY}, $to->{$IDKEY}, $wit ); |
84 | } |
85 | } |
86 | |
87 | # Process the extra node data if it exists. |
88 | foreach my $nodeid ( keys %$extra_data ) { |
89 | my $ed = $extra_data->{$nodeid}; |
90 | if( exists $ed->{$TRANSKEY} ) { |
91 | |
92 | my $tn_reading = $collation->reading( $nodeid ); |
93 | my $main_reading = $collation->reading( $ed->{$TRANSKEY} ); |
94 | if( $collation->linear ) { |
95 | $tn_reading->set_identical( $main_reading ); |
96 | } else { |
97 | $collation->merge_readings( $main_reading, $tn_reading ); |
98 | } |
99 | } # else we don't have any other tags to process yet. |
100 | } |
101 | |
102 | # Find the beginning and end nodes of the graph. The beginning node |
103 | # has no incoming edges; the end node has no outgoing edges. |
104 | my( $begin_node, $end_node ); |
105 | foreach my $gnode ( $collation->readings() ) { |
106 | # print STDERR "Checking node " . $gnode->name . "\n"; |
107 | my @outgoing = $gnode->outgoing(); |
108 | my @incoming = $gnode->incoming(); |
109 | |
110 | unless( scalar @incoming ) { |
111 | warn "Already have a beginning node" if $begin_node; |
112 | $begin_node = $gnode; |
113 | $collation->start( $gnode ); |
114 | } |
115 | unless( scalar @outgoing ) { |
116 | warn "Already have an ending node" if $end_node; |
117 | $end_node = $gnode; |
118 | } |
119 | } |
120 | |
121 | # Record for each witness its sequence of readings, and determine |
122 | # the common nodes. |
123 | my @common_nodes = $collation->walk_witness_paths( $end_node ); |
124 | |
125 | # Now we have added the witnesses and their paths, so have also |
126 | # implicitly marked the common nodes. Now we can calculate their |
127 | # explicit positions. |
128 | $collation->calculate_positions( @common_nodes ); |
129 | } |
130 | |
131 | =back |
132 | |
133 | =head1 LICENSE |
134 | |
135 | This package is free software and is provided "as is" without express |
136 | or implied warranty. You can redistribute it and/or modify it under |
137 | the same terms as Perl itself. |
138 | |
139 | =head1 AUTHOR |
140 | |
141 | Tara L Andrews, aurum@cpan.org |
142 | |
143 | =cut |
144 | |
145 | 1; |