Commit | Line | Data |
32014ec9 |
1 | package Text::Tradition::Parser::Self; |
2 | |
3 | use strict; |
4 | use warnings; |
1f7aa795 |
5 | use Text::Tradition::Parser::GraphML qw/ graphml_parse /; |
9fef629b |
6 | use Text::Tradition::UserStore; |
00c5bf0b |
7 | use TryCatch; |
32014ec9 |
8 | |
9 | =head1 NAME |
10 | |
11 | Text::Tradition::Parser::GraphML |
12 | |
e867486f |
13 | =head1 SYNOPSIS |
14 | |
15 | use Text::Tradition; |
16 | |
17 | my $t_from_file = Text::Tradition->new( |
18 | 'name' => 'my text', |
19 | 'input' => 'Self', |
20 | 'file' => '/path/to/tradition.xml' |
21 | ); |
22 | |
23 | my $t_from_string = Text::Tradition->new( |
24 | 'name' => 'my text', |
25 | 'input' => 'Self', |
26 | 'string' => $tradition_xml, |
27 | ); |
28 | |
32014ec9 |
29 | =head1 DESCRIPTION |
30 | |
31 | Parser module for Text::Tradition to read in its own GraphML output format. |
e867486f |
32 | GraphML is a relatively simple graph description language; a 'graph' element |
33 | can have 'node' and 'edge' elements, and each of these can have simple 'data' |
34 | elements for attributes to be saved. |
32014ec9 |
35 | |
e867486f |
36 | The graph itself has attributes as in the Collation object: |
37 | |
38 | =over |
39 | |
40 | =item * linear |
41 | |
42 | =item * ac_label |
43 | |
44 | =item * baselabel |
45 | |
46 | =item * wit_list_separator |
47 | |
48 | =back |
49 | |
50 | The node objects have the following attributes: |
32014ec9 |
51 | |
52 | =over |
53 | |
e867486f |
54 | =item * name |
55 | |
56 | =item * reading |
57 | |
58 | =item * identical |
59 | |
60 | =item * rank |
61 | |
62 | =item * class |
63 | |
64 | =back |
65 | |
66 | The edge objects have the following attributes: |
67 | |
68 | =over |
69 | |
70 | =item * class |
71 | |
72 | =item * witness (for 'path' class edges) |
73 | |
74 | =item * extra (for 'path' class edges) |
75 | |
76 | =item * relationship (for 'relationship' class edges) |
77 | |
78 | =item * equal_rank (for 'relationship' class edges) |
32014ec9 |
79 | |
e867486f |
80 | =item * non_correctable (for 'relationship' class edges) |
32014ec9 |
81 | |
e867486f |
82 | =item * non_independent (for 'relationship' class edges) |
83 | |
84 | =back |
85 | |
86 | =head1 METHODS |
87 | |
88 | =head2 B<parse> |
89 | |
90 | parse( $graph, $opts ); |
91 | |
92 | Takes an initialized Text::Tradition object and a set of options; creates |
93 | the appropriate nodes and edges on the graph. The options hash should |
94 | include either a 'file' argument or a 'string' argument, depending on the |
95 | source of the XML to be parsed. |
96 | |
97 | =begin testing |
98 | |
9fef629b |
99 | use File::Temp; |
100 | use Test::Warn; |
e867486f |
101 | use Text::Tradition; |
102 | binmode STDOUT, ":utf8"; |
103 | binmode STDERR, ":utf8"; |
104 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
105 | |
106 | my $tradition = 't/data/florilegium_graphml.xml'; |
107 | my $t = Text::Tradition->new( |
108 | 'name' => 'inline', |
109 | 'input' => 'Self', |
110 | 'file' => $tradition, |
111 | ); |
112 | |
bbd064a9 |
113 | is( ref( $t ), 'Text::Tradition', "Parsed GraphML version 2" ); |
e867486f |
114 | if( $t ) { |
115 | is( scalar $t->collation->readings, 319, "Collation has all readings" ); |
255875b8 |
116 | is( scalar $t->collation->paths, 376, "Collation has all paths" ); |
e867486f |
117 | is( scalar $t->witnesses, 13, "Collation has all witnesses" ); |
118 | } |
119 | |
2a812726 |
120 | # TODO add a relationship, add a stemma, write graphml, reparse it, check that |
121 | # the new data is there |
bbd064a9 |
122 | $t->language('Greek'); |
2a812726 |
123 | $t->add_stemma( 'dotfile' => 't/data/florilegium.dot' ); |
bbd064a9 |
124 | $t->collation->add_relationship( 'w12', 'w13', |
125 | { 'type' => 'grammatical', 'scope' => 'global', |
126 | 'annotation' => 'This is some note' } ); |
127 | ok( $t->collation->get_relationship( 'w12', 'w13' ), "Relationship set" ); |
128 | my $graphml_str = $t->collation->as_graphml; |
129 | |
130 | my $newt = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str ); |
131 | is( ref( $newt ), 'Text::Tradition', "Parsed current GraphML version" ); |
132 | if( $newt ) { |
133 | is( scalar $newt->collation->readings, 319, "Collation has all readings" ); |
134 | is( scalar $newt->collation->paths, 376, "Collation has all paths" ); |
135 | is( scalar $newt->witnesses, 13, "Collation has all witnesses" ); |
136 | is( scalar $newt->collation->relationships, 1, "Collation has added relationship" ); |
137 | is( $newt->language, 'Greek', "Tradition has correct language setting" ); |
138 | my $rel = $newt->collation->get_relationship( 'w12', 'w13' ); |
139 | ok( $rel, "Found set relationship" ); |
140 | is( $rel->annotation, 'This is some note', "Relationship has its properties" ); |
2a812726 |
141 | is( scalar $newt->stemmata, 1, "Tradition has its stemma" ); |
142 | is( $newt->stemma(0)->witnesses, $t->stemma(0)->witnesses, "Stemma has correct length witness list" ); |
bbd064a9 |
143 | } |
144 | |
9fef629b |
145 | # Test user save / restore |
146 | my $fh = File::Temp->new(); |
147 | my $file = $fh->filename; |
148 | $fh->close; |
149 | my $dsn = "dbi:SQLite:dbname=$file"; |
150 | my $userstore = Text::Tradition::UserStore->new( { dsn => $dsn, |
151 | extra_args => { create => 1 } } ); |
152 | my $scope = $userstore->new_scope(); |
153 | my $testuser = $userstore->add_user( { url => 'http://example.com' } ); |
154 | is( ref( $testuser ), 'Text::Tradition::User', "Created test user via userstore" ); |
155 | $testuser->add_tradition( $newt ); |
156 | is( $newt->user->id, $testuser->id, "Assigned tradition to test user" ); |
157 | $graphml_str = $newt->collation->as_graphml; |
158 | my $usert; |
159 | warning_is { |
160 | $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str ); |
161 | } 'DROPPING user assignment without a specified userstore', |
162 | "Got expected user drop warning on parse"; |
163 | $usert = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml_str, |
164 | 'userstore' => { 'dsn' => $dsn } ); |
165 | is( $usert->user->id, $testuser->id, "Parsed tradition with userstore points to correct user" ); |
166 | |
bbd064a9 |
167 | |
e867486f |
168 | =end testing |
32014ec9 |
169 | |
170 | =cut |
144d845b |
171 | use Data::Dump; |
32014ec9 |
172 | sub parse { |
dfc37e38 |
173 | my( $tradition, $opts ) = @_; |
2626f709 |
174 | |
175 | # Collation data is in the first graph; relationship-specific stuff |
176 | # is in the second. |
177 | my( $graph_data, $rel_data ) = graphml_parse( $opts ); |
144d845b |
178 | |
32014ec9 |
179 | my $collation = $tradition->collation; |
180 | my %witnesses; |
e309421a |
181 | |
0068967c |
182 | # print STDERR "Setting graph globals\n"; |
e3196b2a |
183 | $tradition->name( $graph_data->{'name'} ); |
144d845b |
184 | |
2626f709 |
185 | my $use_version; |
bbd064a9 |
186 | my $tmeta = $tradition->meta; |
187 | my $cmeta = $collation->meta; |
255875b8 |
188 | foreach my $gkey ( keys %{$graph_data->{'global'}} ) { |
189 | my $val = $graph_data->{'global'}->{$gkey}; |
190 | if( $gkey eq 'version' ) { |
191 | $use_version = $val; |
9fef629b |
192 | } elsif( $gkey eq 'stemmata' ) { |
193 | # Parse the stemmata into objects |
2a812726 |
194 | foreach my $dotstr ( split( /\n/, $val ) ) { |
195 | $tradition->add_stemma( 'dot' => $dotstr ); |
196 | } |
9fef629b |
197 | } elsif( $gkey eq 'user' ) { |
198 | # Assign the tradition to the user if we can |
199 | if( exists $opts->{'userstore'} ) { |
200 | my $userdir; |
201 | try { |
202 | $userdir = Text::Tradition::UserStore->new( $opts->{'userstore'} ); |
203 | } catch { |
204 | warn( "Could not connect to specified user store; DROPPING user assignment" ); |
205 | } |
206 | my $user = $userdir->find_user( { username => $val } ); |
207 | if( $user ) { |
208 | $user->add_tradition( $tradition ); |
209 | } else { |
210 | warn( "Found no user with ID $val; DROPPING user assignment" ); |
211 | } |
212 | } else { |
213 | warn( "DROPPING user assignment without a specified userstore" ); |
214 | } |
bbd064a9 |
215 | } elsif( $tmeta->has_attribute( $gkey ) ) { |
216 | $tradition->$gkey( $val ); |
255875b8 |
217 | } else { |
218 | $collation->$gkey( $val ); |
219 | } |
220 | } |
e309421a |
221 | |
10e4b1ac |
222 | # Add the nodes to the graph. |
223 | # Note any reading IDs that were changed in order to comply with XML |
224 | # name restrictions; we have to hardcode start & end. |
225 | my %namechange = ( '#START#' => '__START__', '#END#' => '__END__' ); |
32014ec9 |
226 | |
bbd064a9 |
227 | # print STDERR "Adding collation readings\n"; |
2626f709 |
228 | foreach my $n ( @{$graph_data->{'nodes'}} ) { |
0174d6a9 |
229 | # If it is the start or end node, we already have one, so |
230 | # grab the rank and go. |
144d845b |
231 | if( defined $n->{'is_start'} ) { |
f3f26624 |
232 | # warn Data::Dump::dump($n); |
233 | # warn $collation->start->id; |
144d845b |
234 | $collation->start->rank($n->{'rank'}); |
235 | next; |
236 | } |
bbd064a9 |
237 | if( defined $n->{'is_end'} ) { |
f3f26624 |
238 | # warn Data::Dump::dump($n); |
bbd064a9 |
239 | $collation->end->rank( $n->{'rank'} ); |
0174d6a9 |
240 | next; |
241 | } |
bbd064a9 |
242 | my $gnode = $collation->add_reading( $n ); |
10e4b1ac |
243 | if( $gnode->id ne $n->{'id'} ) { |
244 | $namechange{$n->{'id'}} = $gnode->id; |
245 | } |
32014ec9 |
246 | } |
910a0a6d |
247 | |
32014ec9 |
248 | # Now add the edges. |
bbd064a9 |
249 | # print STDERR "Adding collation path edges\n"; |
32014ec9 |
250 | foreach my $e ( @{$graph_data->{'edges'}} ) { |
10e4b1ac |
251 | my $sourceid = exists $namechange{$e->{'source'}->{'id'}} |
252 | ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'}; |
253 | my $targetid = exists $namechange{$e->{'target'}->{'id'}} |
254 | ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'}; |
255 | my $from = $collation->reading( $sourceid ); |
256 | my $to = $collation->reading( $targetid ); |
bbd064a9 |
257 | |
258 | warn "No witness label on path edge!" unless $e->{'witness'}; |
259 | my $label = $e->{'witness'} . ( $e->{'extra'} ? $collation->ac_label : '' ); |
260 | $collation->add_path( $from, $to, $label ); |
261 | |
2626f709 |
262 | # Add the witness if we don't have it already. |
bbd064a9 |
263 | unless( $witnesses{$e->{'witness'}} ) { |
82fa4d57 |
264 | $tradition->add_witness( |
265 | sigil => $e->{'witness'}, 'sourcetype' => 'collation' ); |
bbd064a9 |
266 | $witnesses{$e->{'witness'}} = 1; |
255875b8 |
267 | } |
bbd064a9 |
268 | $tradition->witness( $e->{'witness'} )->is_layered( 1 ) if $e->{'extra'}; |
32014ec9 |
269 | } |
2626f709 |
270 | |
271 | ## Done with the main graph, now look at the relationships. |
272 | # Nodes are added via the call to add_reading above. We only need |
273 | # add the relationships themselves. |
274 | # TODO check that scoping does trt |
bf6e338d |
275 | $rel_data->{'edges'} ||= []; # so that the next line doesn't break on no rels |
276 | foreach my $e ( sort { _layersort_rel( $a, $b ) } @{$rel_data->{'edges'}} ) { |
10e4b1ac |
277 | my $sourceid = exists $namechange{$e->{'source'}->{'id'}} |
278 | ? $namechange{$e->{'source'}->{'id'}} : $e->{'source'}->{'id'}; |
279 | my $targetid = exists $namechange{$e->{'target'}->{'id'}} |
280 | ? $namechange{$e->{'target'}->{'id'}} : $e->{'target'}->{'id'}; |
281 | my $from = $collation->reading( $sourceid ); |
282 | my $to = $collation->reading( $targetid ); |
bbd064a9 |
283 | delete $e->{'source'}; |
284 | delete $e->{'target'}; |
285 | # The remaining keys are relationship attributes. |
286 | # Backward compatibility... |
287 | if( $use_version eq '2.0' || $use_version eq '3.0' ) { |
288 | delete $e->{'class'}; |
289 | $e->{'type'} = delete $e->{'relationship'} if exists $e->{'relationship'}; |
290 | } |
291 | # Add the specified relationship unless we already have done. |
fdfa59a7 |
292 | my $rel_exists; |
bbd064a9 |
293 | if( $e->{'scope'} ne 'local' ) { |
294 | my $relobj = $collation->get_relationship( $from, $to ); |
295 | if( $relobj && $relobj->scope eq $e->{'scope'} |
296 | && $relobj->type eq $e->{'type'} ) { |
fdfa59a7 |
297 | $rel_exists = 1; |
298 | } |
299 | } |
00c5bf0b |
300 | try { |
301 | $collation->add_relationship( $from, $to, $e ) unless $rel_exists; |
302 | } catch( Text::Tradition::Error $e ) { |
303 | warn "DROPPING $from -> $to: " . $e->message; |
304 | } |
2626f709 |
305 | } |
861c3e27 |
306 | |
307 | # Save the text for each witness so that we can ensure consistency |
308 | # later on |
bbd064a9 |
309 | $collation->text_from_paths(); |
32014ec9 |
310 | } |
311 | |
bf6e338d |
312 | ## Return the relationship that comes first in priority. |
313 | my %LAYERS = ( |
314 | 'collated' => 1, |
315 | 'orthographic' => 2, |
316 | 'spelling' => 3, |
317 | ); |
318 | |
319 | sub _layersort_rel { |
320 | my( $a, $b ) = @_; |
321 | my $key = exists $a->{'type'} ? 'type' : 'relationship'; |
322 | my $at = $LAYERS{$a->{$key}} || 99; |
323 | my $bt = $LAYERS{$b->{$key}} || 99; |
324 | return $at <=> $bt; |
325 | } |
326 | |
e867486f |
327 | 1; |
328 | |
329 | =head1 BUGS / TODO |
330 | |
331 | =over |
332 | |
333 | =item * Make this into a stream parser with GraphML |
334 | |
335 | =item * Simply field -> attribute correspondence for nodes and edges |
336 | |
337 | =item * Share key name constants with Collation.pm |
338 | |
32014ec9 |
339 | =back |
340 | |
341 | =head1 LICENSE |
342 | |
343 | This package is free software and is provided "as is" without express |
344 | or implied warranty. You can redistribute it and/or modify it under |
345 | the same terms as Perl itself. |
346 | |
347 | =head1 AUTHOR |
348 | |
e867486f |
349 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |