Commit | Line | Data |
32014ec9 |
1 | package Text::Tradition::Parser::Self; |
2 | |
3 | use strict; |
4 | use warnings; |
1f7aa795 |
5 | use Text::Tradition::Parser::GraphML qw/ graphml_parse /; |
32014ec9 |
6 | |
7 | =head1 NAME |
8 | |
9 | Text::Tradition::Parser::GraphML |
10 | |
e867486f |
11 | =head1 SYNOPSIS |
12 | |
13 | use Text::Tradition; |
14 | |
15 | my $t_from_file = Text::Tradition->new( |
16 | 'name' => 'my text', |
17 | 'input' => 'Self', |
18 | 'file' => '/path/to/tradition.xml' |
19 | ); |
20 | |
21 | my $t_from_string = Text::Tradition->new( |
22 | 'name' => 'my text', |
23 | 'input' => 'Self', |
24 | 'string' => $tradition_xml, |
25 | ); |
26 | |
32014ec9 |
27 | =head1 DESCRIPTION |
28 | |
29 | Parser module for Text::Tradition to read in its own GraphML output format. |
e867486f |
30 | GraphML is a relatively simple graph description language; a 'graph' element |
31 | can have 'node' and 'edge' elements, and each of these can have simple 'data' |
32 | elements for attributes to be saved. |
32014ec9 |
33 | |
e867486f |
34 | The graph itself has attributes as in the Collation object: |
35 | |
36 | =over |
37 | |
38 | =item * linear |
39 | |
40 | =item * ac_label |
41 | |
42 | =item * baselabel |
43 | |
44 | =item * wit_list_separator |
45 | |
46 | =back |
47 | |
48 | The node objects have the following attributes: |
32014ec9 |
49 | |
50 | =over |
51 | |
e867486f |
52 | =item * name |
53 | |
54 | =item * reading |
55 | |
56 | =item * identical |
57 | |
58 | =item * rank |
59 | |
60 | =item * class |
61 | |
62 | =back |
63 | |
64 | The edge objects have the following attributes: |
65 | |
66 | =over |
67 | |
68 | =item * class |
69 | |
70 | =item * witness (for 'path' class edges) |
71 | |
72 | =item * extra (for 'path' class edges) |
73 | |
74 | =item * relationship (for 'relationship' class edges) |
75 | |
76 | =item * equal_rank (for 'relationship' class edges) |
32014ec9 |
77 | |
e867486f |
78 | =item * non_correctable (for 'relationship' class edges) |
32014ec9 |
79 | |
e867486f |
80 | =item * non_independent (for 'relationship' class edges) |
81 | |
82 | =back |
83 | |
84 | =head1 METHODS |
85 | |
86 | =head2 B<parse> |
87 | |
88 | parse( $graph, $opts ); |
89 | |
90 | Takes an initialized Text::Tradition object and a set of options; creates |
91 | the appropriate nodes and edges on the graph. The options hash should |
92 | include either a 'file' argument or a 'string' argument, depending on the |
93 | source of the XML to be parsed. |
94 | |
95 | =begin testing |
96 | |
97 | use Text::Tradition; |
98 | binmode STDOUT, ":utf8"; |
99 | binmode STDERR, ":utf8"; |
100 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
101 | |
102 | my $tradition = 't/data/florilegium_graphml.xml'; |
103 | my $t = Text::Tradition->new( |
104 | 'name' => 'inline', |
105 | 'input' => 'Self', |
106 | 'file' => $tradition, |
107 | ); |
108 | |
bbd064a9 |
109 | is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" ); |
e867486f |
110 | if( $t ) { |
111 | is( scalar $t->collation->readings, 319, "Collation has all readings" ); |
255875b8 |
112 | is( scalar $t->collation->paths, 376, "Collation has all paths" ); |
e867486f |
113 | is( scalar $t->witnesses, 13, "Collation has all witnesses" ); |
114 | } |
115 | |
bbd064a9 |
116 | # TODO add a relationship, write graphml, reparse it, check that the rel |
117 | # is still there |
118 | $t->language('Greek'); |
119 | $t->collation->add_relationship( 'w12', 'w13', |
120 | { 'type' => 'grammatical', 'scope' => 'global', |
121 | 'annotation' => 'This is some note' } ); |
122 | ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" ); |
123 | my $graphml_str = $t->collation->as_graphml; |
124 | |
125 | my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str ); |
126 | is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" ); |
127 | if( $newt ) { |
128 | is( scalar $newt->collation->readings, 319, "Collation has all readings" ); |
129 | is( scalar $newt->collation->paths, 376, "Collation has all paths" ); |
130 | is( scalar $newt->witnesses, 13, "Collation has all witnesses" ); |
131 | is( scalar $newt->collation->relationships, 1, "Collation has added relationship" ); |
132 | is( $newt->language, 'Greek', "Tradition has correct language setting" ); |
133 | my $rel = $newt->collation->get_relationship( 'w12', 'w13' ); |
134 | ok( $rel, "Found set relationship" ); |
135 | is( $rel->annotation, 'This is some note', "Relationship has its properties" ); |
136 | } |
137 | |
138 | |
e867486f |
139 | =end testing |
32014ec9 |
140 | |
141 | =cut |
142 | |
32014ec9 |
143 | sub parse { |
dfc37e38 |
144 | my( $tradition, $opts ) = @_; |
2626f709 |
145 | |
146 | # Collation data is in the first graph; relationship-specific stuff |
147 | # is in the second. |
148 | my( $graph_data, $rel_data ) = graphml_parse( $opts ); |
94c00c71 |
149 | |
32014ec9 |
150 | my $collation = $tradition->collation; |
151 | my %witnesses; |
e309421a |
152 | |
0068967c |
153 | # print STDERR "Setting graph globals\n"; |
e3196b2a |
154 | $tradition->name( $graph_data->{'name'} ); |
2626f709 |
155 | my $use_version; |
bbd064a9 |
156 | my $tmeta = $tradition->meta; |
157 | my $cmeta = $collation->meta; |
255875b8 |
158 | foreach my $gkey ( keys %{$graph_data->{'global'}} ) { |
159 | my $val = $graph_data->{'global'}->{$gkey}; |
160 | if( $gkey eq 'version' ) { |
161 | $use_version = $val; |
bbd064a9 |
162 | } elsif( $tmeta->has_attribute( $gkey ) ) { |
163 | $tradition->$gkey( $val ); |
255875b8 |
164 | } else { |
165 | $collation->$gkey( $val ); |
166 | } |
167 | } |
e309421a |
168 | |
32014ec9 |
169 | # Add the nodes to the graph. |
170 | |
bbd064a9 |
171 | # print STDERR "Adding collation readings\n"; |
2626f709 |
172 | foreach my $n ( @{$graph_data->{'nodes'}} ) { |
0174d6a9 |
173 | # If it is the start or end node, we already have one, so |
174 | # grab the rank and go. |
bbd064a9 |
175 | next if( defined $n->{'is_start'} ); |
176 | if( defined $n->{'is_end'} ) { |
177 | $collation->end->rank( $n->{'rank'} ); |
0174d6a9 |
178 | next; |
179 | } |
bbd064a9 |
180 | my $gnode = $collation->add_reading( $n ); |
32014ec9 |
181 | } |
910a0a6d |
182 | |
32014ec9 |
183 | # Now add the edges. |
bbd064a9 |
184 | # print STDERR "Adding collation path edges\n"; |
32014ec9 |
185 | foreach my $e ( @{$graph_data->{'edges'}} ) { |
bbd064a9 |
186 | my $from = $collation->reading( $e->{'source'}->{'id'} ); |
187 | my $to = $collation->reading( $e->{'target'}->{'id'} ); |
188 | |
189 | warn "No witness label on path edge!" unless $e->{'witness'}; |
190 | my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' ); |
191 | $collation->add_path( $from, $to, $label ); |
192 | |
2626f709 |
193 | # Add the witness if we don't have it already. |
bbd064a9 |
194 | unless( $witnesses{$e->{'witness'}} ) { |
195 | $tradition->add_witness( sigil => $e->{'witness'} ); |
196 | $witnesses{$e->{'witness'}} = 1; |
255875b8 |
197 | } |
bbd064a9 |
198 | $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'}; |
32014ec9 |
199 | } |
2626f709 |
200 | |
201 | ## Done with the main graph, now look at the relationships. |
202 | # Nodes are added via the call to add_reading above. We only need |
203 | # add the relationships themselves. |
204 | # TODO check that scoping does trt |
bf6e338d |
205 | $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels |
206 | foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) { |
bbd064a9 |
207 | my $from = $collation->reading( $e->{'source'}->{'id'} ); |
208 | my $to = $collation->reading( $e->{'target'}->{'id'} ); |
209 | delete $e->{'source'}; |
210 | delete $e->{'target'}; |
211 | # The remaining keys are relationship attributes. |
212 | # Backward compatibility... |
213 | if( $use_version eq '2.0' || $use_version eq '3.0' ) { |
214 | delete $e->{'class'}; |
215 | $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'}; |
216 | } |
217 | # Add the specified relationship unless we already have done. |
fdfa59a7 |
218 | my $rel_exists; |
bbd064a9 |
219 | if( $e->{'scope'} ne 'local' ) { |
220 | my $relobj = $collation->get_relationship( $from, $to ); |
221 | if( $relobj && $relobj->scope eq $e->{'scope'} |
222 | && $relobj->type eq $e->{'type'} ) { |
fdfa59a7 |
223 | $rel_exists = 1; |
224 | } |
225 | } |
bbd064a9 |
226 | $collation->add_relationship( $from, $to, $e ) unless $rel_exists; |
2626f709 |
227 | } |
861c3e27 |
228 | |
229 | # Save the text for each witness so that we can ensure consistency |
230 | # later on |
bbd064a9 |
231 | $collation->text_from_paths(); |
32014ec9 |
232 | } |
233 | |
bf6e338d |
234 | ## Return the relationship that comes first in priority. |
235 | my %LAYERS = ( |
236 | 'collated' => 1, |
237 | 'orthographic' => 2, |
238 | 'spelling' => 3, |
239 | ); |
240 | |
241 | sub _layersort_rel { |
242 | my( $a, $b ) = @_; |
243 | my $key = exists $a->{'type'} ? 'type' : 'relationship'; |
244 | my $at = $LAYERS{$a->{$key}} || 99; |
245 | my $bt = $LAYERS{$b->{$key}} || 99; |
246 | return $at <=> $bt; |
247 | } |
248 | |
e867486f |
249 | 1; |
250 | |
251 | =head1 BUGS / TODO |
252 | |
253 | =over |
254 | |
255 | =item * Make this into a stream parser with GraphML |
256 | |
257 | =item * Simply field -> attribute correspondence for nodes and edges |
258 | |
259 | =item * Share key name constants with Collation.pm |
260 | |
32014ec9 |
261 | =back |
262 | |
263 | =head1 LICENSE |
264 | |
265 | This package is free software and is provided "as is" without express |
266 | or implied warranty. You can redistribute it and/or modify it under |
267 | the same terms as Perl itself. |
268 | |
269 | =head1 AUTHOR |
270 | |
e867486f |
271 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |