1 package Text::Tradition::Collation;
4 use Encode qw( decode_utf8 );
8 use IPC::Run qw( run binary );
9 use JSON qw/ to_json /;
11 use Text::Tradition::Collation::Data;
12 use Text::Tradition::Collation::Reading;
13 use Text::Tradition::Collation::RelationshipStore;
14 use Text::Tradition::Error;
15 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
17 use XML::LibXML::XPathContext;
21 isa => 'Text::Tradition::Collation::Data',
60 isa => 'Text::Tradition',
61 writer => '_set_tradition',
69 Text::Tradition::Collation - a software model for a text collation
74 my $t = Text::Tradition->new(
75 'name' => 'this is a text',
77 'file' => '/path/to/tei_parallel_seg_file.xml' );
79 my $c = $t->collation;
80 my @readings = $c->readings;
81 my @paths = $c->paths;
82 my @relationships = $c->relationships;
84 my $svg_variant_graph = $t->collation->as_svg();
88 Text::Tradition is a library for representation and analysis of collated
89 texts, particularly medieval ones. The Collation is the central feature of
90 a Tradition, where the text, its sequence of readings, and its relationships
91 between readings are actually kept.
97 The constructor. Takes a hash or hashref of the following arguments:
101 =item * tradition - The Text::Tradition object to which the collation
104 =item * linear - Whether the collation should be linear; that is, whether
105 transposed readings should be treated as two linked readings rather than one,
106 and therefore whether the collation graph is acyclic. Defaults to true.
108 =item * baselabel - The default label for the path taken by a base text
109 (if any). Defaults to 'base text'.
111 =item * wit_list_separator - The string to join a list of witnesses for
112 purposes of making labels in display graphs. Defaults to ', '.
114 =item * ac_label - The extra label to tack onto a witness sigil when
115 representing another layer of path for the given witness - that is, when
116 a text has more than one possible reading due to scribal corrections or
117 the like. Defaults to ' (a.c.)'.
119 =item * wordsep - The string used to separate words in the original text.
130 =head2 wit_list_separator
138 Simple accessors for collation attributes.
142 The meta-reading at the start of every witness path.
146 The meta-reading at the end of every witness path.
150 Returns all Reading objects in the graph.
152 =head2 reading( $id )
154 Returns the Reading object corresponding to the given ID.
156 =head2 add_reading( $reading_args )
158 Adds a new reading object to the collation.
159 See L<Text::Tradition::Collation::Reading> for the available arguments.
161 =head2 del_reading( $object_or_id )
163 Removes the given reading from the collation, implicitly removing its
164 paths and relationships.
166 =head2 has_reading( $id )
168 Predicate to see whether a given reading ID is in the graph.
170 =head2 reading_witnesses( $object_or_id )
172 Returns a list of sigils whose witnesses contain the reading.
176 Returns all reading paths within the document - that is, all edges in the
177 collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
179 =head2 add_path( $source, $target, $sigil )
181 Links the given readings in the collation in sequence, under the given witness
182 sigil. The readings may be specified by object or ID.
184 =head2 del_path( $source, $target, $sigil )
186 Links the given readings in the collation in sequence, under the given witness
187 sigil. The readings may be specified by object or ID.
189 =head2 has_path( $source, $target );
191 Returns true if the two readings are linked in sequence in any witness.
192 The readings may be specified by object or ID.
196 Returns all Relationship objects in the collation.
198 =head2 add_relationship( $reading, $other_reading, $options )
200 Adds a new relationship of the type given in $options between the two readings,
201 which may be specified by object or ID. Returns a value of ( $status, @vectors)
202 where $status is true on success, and @vectors is a list of relationship edges
203 that were ultimately added.
204 See L<Text::Tradition::Collation::Relationship> for the available options.
209 my ( $class, @args ) = @_;
210 my %args = @args == 1 ? %{ $args[0] } : @args;
211 # TODO determine these from the Moose::Meta object
212 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
213 linear wordsep start end cached_table _graphcalc_done);
215 for my $attr (@delegate_attrs) {
216 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
218 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
224 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
225 $self->_set_start( $self->add_reading(
226 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
227 $self->_set_end( $self->add_reading(
228 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
231 =head2 register_relationship_type( %relationship_definition )
233 Add a relationship type definition to this collation. The argument can be either a
234 hash or a hashref, defining the properties of the relationship. For relationship types
235 and their properties, see L<Text::Tradition::Collation::RelationshipType>.
237 =head2 get_relationship_type( $relationship_name )
239 Retrieve the RelationshipType object for the relationship with the given name.
243 sub register_relationship_type {
245 my %args = @_ == 1 ? %{$_[0]} : @_;
246 if( $self->relations->has_type( $args{name} ) ) {
247 throw( 'Relationship type ' . $args{name} . ' already registered' );
249 $self->relations->add_type( %args );
252 sub get_relationship_type {
253 my( $self, $name ) = @_;
254 return $self->relations->has_type( $name )
255 ? $self->relations->type( $name ) : undef;
258 ### Reading construct/destruct functions
261 my( $self, $reading ) = @_;
262 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
263 my %args = %$reading;
264 if( $args{'init'} ) {
265 # If we are initializing an empty collation, don't assume that we
266 # have set a tradition.
267 delete $args{'init'};
268 } elsif( $self->tradition->can('language') && $self->tradition->has_language
269 && !exists $args{'language'} ) {
270 $args{'language'} = $self->tradition->language;
272 $reading = Text::Tradition::Collation::Reading->new(
273 'collation' => $self,
276 # First check to see if a reading with this ID exists.
277 if( $self->reading( $reading->id ) ) {
278 throw( "Collation already has a reading with id " . $reading->id );
280 $self->_graphcalc_done(0);
281 $self->_add_reading( $reading->id => $reading );
282 # Once the reading has been added, put it in both graphs.
283 $self->sequence->add_vertex( $reading->id );
284 $self->relations->add_reading( $reading->id );
288 around del_reading => sub {
293 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
296 # Remove the reading from the graphs.
297 $self->_graphcalc_done(0);
298 $self->_clear_cache; # Explicitly clear caches to GC the reading
299 $self->sequence->delete_vertex( $arg );
300 $self->relations->delete_reading( $arg );
303 $self->$orig( $arg );
306 =head2 merge_readings( $main, $second, $concatenate, $with_str )
308 Merges the $second reading into the $main one. If $concatenate is true, then
309 the merged node will carry the text of both readings, concatenated with either
310 $with_str (if specified) or a sensible default (the empty string if the
311 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
313 The first two arguments may be either readings or reading IDs.
320 my $cxfile = 't/data/Collatex-16.xml';
321 my $t = Text::Tradition->new(
323 'input' => 'CollateX',
326 my $c = $t->collation;
328 my $rno = scalar $c->readings;
329 # Split n21 ('unto') for testing purposes
330 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
331 my $old_r = $c->reading( 'n21' );
332 $old_r->alter_text( 'to' );
333 $c->del_path( 'n20', 'n21', 'A' );
334 $c->add_path( 'n20', 'n21p0', 'A' );
335 $c->add_path( 'n21p0', 'n21', 'A' );
336 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
338 ok( $c->reading( 'n21p0' ), "New reading exists" );
339 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
341 # Combine n3 and n4 ( with his )
342 $c->merge_readings( 'n3', 'n4', 1 );
343 ok( !$c->reading('n4'), "Reading n4 is gone" );
344 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
346 # Collapse n9 and n10 ( rood / root )
347 $c->merge_readings( 'n9', 'n10' );
348 ok( !$c->reading('n10'), "Reading n10 is gone" );
349 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
351 # Try to combine n21 and n21p0. This should break.
352 my $remaining = $c->reading('n21');
353 $remaining ||= $c->reading('n22'); # one of these should still exist
355 $c->merge_readings( 'n21p0', $remaining, 1 );
356 ok( 0, "Bad reading merge changed the graph" );
357 } catch( Text::Tradition::Error $e ) {
358 like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
360 ok( 0, "Unexpected error on bad reading merge: $@" );
364 $c->calculate_ranks();
365 ok( 1, "Graph is still evidently whole" );
366 } catch( Text::Tradition::Error $e ) {
367 ok( 0, "Caught a rank exception: " . $e->message );
378 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
379 my $mergemeta = $kept_obj->is_meta;
380 throw( "Cannot merge meta and non-meta reading" )
381 unless ( $mergemeta && $del_obj->is_meta )
382 || ( !$mergemeta && !$del_obj->is_meta );
384 throw( "Cannot merge with start or end node" )
385 if( $kept_obj eq $self->start || $kept_obj eq $self->end
386 || $del_obj eq $self->start || $del_obj eq $self->end );
387 throw( "Cannot combine text of meta readings" ) if $combine;
389 # We can only merge readings in a linear graph if:
390 # - they are contiguous with only one edge between them, OR
391 # - they are at equivalent ranks in the graph.
392 if( $self->linear ) {
393 my @delpred = $del_obj->predecessors;
394 my @keptsuc = $kept_obj->successors;
395 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
396 && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
397 my( $is_ok, $msg ) = $self->relations->relationship_valid(
398 $kept_obj, $del_obj, 'collated' );
400 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
405 # We only need the IDs for adding paths to the graph, not the reading
406 # objects themselves.
407 my $kept = $kept_obj->id;
408 my $deleted = $del_obj->id;
409 $self->_graphcalc_done(0);
411 # The kept reading should inherit the paths and the relationships
412 # of the deleted reading.
413 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
414 my @vector = ( $kept );
415 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
416 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
417 next if $vector[0] eq $vector[1]; # Don't add a self loop
418 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
419 $self->sequence->add_edge( @vector );
420 my $fwits = $self->sequence->get_edge_attributes( @vector );
421 @wits{keys %$fwits} = values %$fwits;
422 $self->sequence->set_edge_attributes( @vector, \%wits );
424 $self->relations->merge_readings( $kept, $deleted, $combine );
426 # Do the deletion deed.
428 # Combine the text of the readings
429 my $joinstr = $combine_char;
430 unless( defined $joinstr ) {
431 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
432 $joinstr = $self->wordsep unless defined $joinstr;
434 $kept_obj->_combine( $del_obj, $joinstr );
436 $self->del_reading( $deleted );
439 =head2 merge_related( @relationship_types )
441 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
443 WARNING: This operation cannot be undone.
455 $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
456 } [qr/Cannot set relationship on a meta reading/],
457 "Got expected relationship drop warning on parse";
459 my $c = $t->collation;
462 map { $rdg_ids{$_} = 1 } $c->readings;
463 $c->merge_related( 'orthographic' );
464 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
465 "Successfully collapsed orthographic variation" );
466 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
467 foreach my $rid ( keys %rdg_ids ) {
468 my $exp = $rdg_ids{$rid};
469 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
470 ( $exp ? "retained" : "removed" ) );
472 ok( $c->linear, "Graph is still linear" );
474 $c->calculate_ranks; # This should succeed
475 ok( 1, "Can still calculate ranks on the new graph" );
477 ok( 0, "Rank calculation on merged graph failed: $@" );
480 # Now add some transpositions
481 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
482 $c->merge_related( 'transposition' );
483 is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
484 "Transposed relationship is merged away" );
485 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
486 ok( !$c->linear, "Graph is no longer linear" );
488 $c->calculate_ranks; # This should fail
489 ok( 0, "Rank calculation happened on nonlinear graph?!" );
490 } catch ( Text::Tradition::Error $e ) {
491 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
492 "Rank calculation on merged graph threw an error" );
499 # TODO: there should be a way to display merged without affecting the underlying data!
504 map { $reltypehash{$_} = 1 } @_;
506 # Set up the filter for finding related readings
508 exists $reltypehash{$_[0]->type};
511 # Go through all readings looking for related ones
512 foreach my $r ( $self->readings ) {
513 next unless $self->reading( "$r" ); # might have been deleted meanwhile
514 while( my @related = $self->related_readings( $r, $filter ) ) {
515 push( @related, $r );
517 scalar $b->witnesses <=> scalar $a->witnesses
519 my $keep = shift @related;
520 foreach my $delr ( @related ) {
522 unless( $self->get_relationship( $keep, $delr )->colocated );
523 $self->merge_readings( $keep, $delr );
529 =head2 compress_readings
531 Where possible in the graph, compresses plain sequences of readings into a
532 single reading. The sequences must consist of readings with no
533 relationships to other readings, with only a single witness path between
534 them and no other witness paths from either that would skip the other. The
535 readings must also not be marked as nonsense or bad grammar.
537 WARNING: This operation cannot be undone.
541 sub compress_readings {
543 # Sanity check: first save the original text of each witness.
545 foreach my $wit ( $self->tradition->witnesses ) {
546 $origtext{$wit->sigil} = $self->path_text( $wit->sigil );
547 if( $wit->is_layered ) {
548 my $acsig = $wit->sigil . $self->ac_label;
549 $origtext{$acsig} = $self->path_text( $acsig );
554 # Anywhere in the graph that there is a reading that joins only to a single
555 # successor, and neither of these have any relationships, just join the two
557 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
558 # Now look for readings that can be joined to their successors.
559 next unless $rdg->is_combinable;
561 while( $self->sequence->successors( $rdg ) == 1 ) {
562 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
563 throw( "Infinite loop" ) if $seen{$next->id};
564 $seen{$next->id} = 1;
565 last if $self->sequence->predecessors( $next ) > 1;
566 last unless $next->is_combinable;
567 say "Joining readings $rdg and $next";
568 $self->merge_readings( $rdg, $next, 1 );
572 # Finally, make sure we haven't screwed anything up.
573 foreach my $wit ( $self->tradition->witnesses ) {
574 my $pathtext = $self->path_text( $wit->sigil );
575 throw( "Text differs for witness " . $wit->sigil )
576 unless $pathtext eq $origtext{$wit->sigil};
577 if( $wit->is_layered ) {
578 my $acsig = $wit->sigil . $self->ac_label;
579 $pathtext = $self->path_text( $acsig );
580 throw( "Layered text differs for witness " . $wit->sigil )
581 unless $pathtext eq $origtext{$acsig};
585 $self->relations->rebuild_equivalence();
586 $self->calculate_ranks();
589 # Helper function for manipulating the graph.
590 sub _stringify_args {
591 my( $self, $first, $second, @args ) = @_;
593 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
594 $second = $second->id
595 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
596 return( $first, $second, @args );
599 # Helper function for manipulating the graph.
600 sub _objectify_args {
601 my( $self, $first, $second, $arg ) = @_;
602 $first = $self->reading( $first )
603 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
604 $second = $self->reading( $second )
605 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
606 return( $first, $second, $arg );
609 =head2 duplicate_reading( $reading, @witlist )
611 Split the given reading into two, so that the new reading is in the path for
612 the witnesses given in @witlist. If the result is that certain non-colocated
613 relationships (e.g. transpositions) are no longer valid, these will be removed.
614 Returns the newly-created reading.
618 use Test::More::UTF8;
622 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
623 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
624 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
626 my $sc = $st->collation;
628 ok( $sc->reading('n131'), "Tradition has the affected reading" );
629 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
630 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
632 # Detach the erroneously collated reading
633 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
634 ok( $newr, "New reading was created" );
635 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
636 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
637 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
638 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
639 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
641 # Check that the bad transposition is gone
642 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
643 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
645 # The collation should not be fixed
646 my @pairs = $sc->identical_readings();
647 is( scalar @pairs, 0, "Not re-collated yet" );
649 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
650 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
651 is( scalar @pairs, 3, "Found three more identical readings" );
652 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
653 $sc->flatten_ranks();
654 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
656 # Check that we can't "duplicate" a reading with no wits or with all wits
658 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
659 ok( 0, "Reading duplication without witnesses throws an error" );
660 } catch( Text::Tradition::Error $e ) {
661 like( $e->message, qr/Must specify one or more witnesses/,
662 "Reading duplication without witnesses throws the expected error" );
664 ok( 0, "Reading duplication without witnesses threw the wrong error" );
668 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
669 ok( 0, "Reading duplication with all witnesses throws an error" );
670 } catch( Text::Tradition::Error $e ) {
671 like( $e->message, qr/Cannot join all witnesses/,
672 "Reading duplication with all witnesses throws the expected error" );
674 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
678 $sc->calculate_ranks();
679 ok( 1, "Graph is still evidently whole" );
680 } catch( Text::Tradition::Error $e ) {
681 ok( 0, "Caught a rank exception: " . $e->message );
688 sub duplicate_reading {
689 my( $self, $r, @wits ) = @_;
690 # Check that we are not doing anything unwise.
691 throw( "Must specify one or more witnesses for the duplicated reading" )
693 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
694 $r = $self->reading( $r );
696 throw( "Cannot duplicate a meta-reading" )
698 throw( "Cannot join all witnesses to the new reading" )
699 if scalar( @wits ) == scalar( $r->witnesses );
701 # Get all the reading attributes and duplicate them.
702 my $rmeta = Text::Tradition::Collation::Reading->meta;
704 foreach my $attr( $rmeta->get_all_attributes ) {
705 next if $attr->name =~ /^_/;
706 my $acc = $attr->get_read_method;
707 if( !$acc && $attr->has_applied_traits ) {
708 my $tr = $attr->applied_traits;
709 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
711 my %methods = reverse %{$attr->handles};
712 $acc = $methods{elements};
713 $args{$attr->name} = $which eq 'Array'
714 ? [ $r->$acc ] : { $r->$acc };
717 $args{$attr->name} = $r->$acc if $acc;
720 # By definition the new reading will no longer be common.
721 $args{is_common} = 0;
722 # The new reading also needs its own ID.
723 $args{id} = $self->_generate_dup_id( $r->id );
725 # Try to make the new reading.
726 my $newr = $self->add_reading( \%args );
727 # The old reading is also no longer common.
730 # For each of the witnesses, dissociate from the old reading and
731 # associate with the new.
732 foreach my $wit ( @wits ) {
733 my $prior = $self->prior_reading( $r, $wit );
734 my $next = $self->next_reading( $r, $wit );
735 $self->del_path( $prior, $r, $wit );
736 $self->add_path( $prior, $newr, $wit );
737 $self->del_path( $r, $next, $wit );
738 $self->add_path( $newr, $next, $wit );
741 # If the graph is ranked, we need to look for relationships that are now
742 # invalid (i.e. 'non-colocation' types that might now be colocated) and
743 # remove them. If not, we can skip it.
746 my @deleted_relations;
747 if( $self->end->has_rank ) {
748 # Find the point where we can stop checking
749 $succ = $self->common_successor( $r, $newr );
751 # Hash the existing ranks
752 foreach my $rdg ( $self->readings ) {
753 $rrk{$rdg->id} = $rdg->rank;
755 # Calculate the new ranks
756 $self->calculate_ranks();
758 # Check for invalid non-colocated relationships among changed-rank readings
759 # from where the ranks start changing up to $succ
760 my $lastrank = $succ->rank;
761 foreach my $rdg ( $self->readings ) {
762 next if $rdg->rank > $lastrank;
763 next if $rdg->rank == $rrk{$rdg->id};
764 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
765 next unless @noncolo;
766 foreach my $nc ( @noncolo ) {
767 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
768 push( @deleted_relations, [ $rdg->id, $nc->id ] );
773 return ( $newr, @deleted_relations );
776 sub _generate_dup_id {
777 my( $self, $rid ) = @_;
782 if( $self->has_reading( $newid ) ) {
795 # We only need the IDs for adding paths to the graph, not the reading
796 # objects themselves.
797 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
799 $self->_graphcalc_done(0);
800 # Connect the readings
801 unless( $self->sequence->has_edge( $source, $target ) ) {
802 $self->sequence->add_edge( $source, $target );
803 $self->relations->add_equivalence_edge( $source, $target );
805 # Note the witness in question
806 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
812 if( ref( $_[0] ) eq 'ARRAY' ) {
819 # We only need the IDs for removing paths from the graph, not the reading
820 # objects themselves.
821 my( $source, $target, $wit ) = $self->_stringify_args( @args );
823 $self->_graphcalc_done(0);
824 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
825 $self->sequence->delete_edge_attribute( $source, $target, $wit );
827 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
828 $self->sequence->delete_edge( $source, $target );
829 $self->relations->delete_equivalence_edge( $source, $target );
834 # Extra graph-alike utility
837 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
838 return undef unless $self->sequence->has_edge( $source, $target );
839 return $self->sequence->has_edge_attribute( $source, $target, $wit );
842 =head2 clear_witness( @sigil_list )
844 Clear the given witnesses out of the collation entirely, removing references
845 to them in paths, and removing readings that belong only to them. Should only
846 be called via $tradition->del_witness.
851 my( $self, @sigils ) = @_;
853 $self->_graphcalc_done(0);
854 # Clear the witness(es) out of the paths
855 foreach my $e ( $self->paths ) {
856 foreach my $sig ( @sigils ) {
857 $self->del_path( $e, $sig );
861 # Clear out the newly unused readings
862 foreach my $r ( $self->readings ) {
863 unless( $self->reading_witnesses( $r ) ) {
864 $self->del_reading( $r );
869 sub add_relationship {
871 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
872 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
873 foreach my $v ( @vectors ) {
874 next unless $self->get_relationship( $v )->colocated;
875 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
876 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
877 $self->_graphcalc_done(0);
885 around qw/ get_relationship del_relationship / => sub {
889 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
892 my @stringargs = $self->_stringify_args( @args );
893 $self->$orig( @stringargs );
896 =head2 reading_witnesses( $reading )
898 Return a list of sigils corresponding to the witnesses in which the reading appears.
902 sub reading_witnesses {
903 my( $self, $reading ) = @_;
904 # We need only check either the incoming or the outgoing edges; I have
905 # arbitrarily chosen "incoming". Thus, special-case the start node.
906 if( $reading eq $self->start ) {
907 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
910 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
911 my $wits = $self->sequence->get_edge_attributes( @$e );
912 @all_witnesses{ keys %$wits } = 1;
914 my $acstr = $self->ac_label;
915 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
916 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
918 return keys %all_witnesses;
921 =head1 OUTPUT METHODS
923 =head2 as_svg( \%options )
925 Returns an SVG string that represents the graph, via as_dot and graphviz.
926 See as_dot for a list of options. Must have GraphViz (dot) installed to run.
931 my( $self, $opts ) = @_;
932 throw( "Need GraphViz installed to output SVG" )
933 unless File::Which::which( 'dot' );
934 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
935 $self->calculate_ranks()
936 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
937 my @cmd = qw/dot -Tsvg/;
939 my $dotfile = File::Temp->new();
941 # $dotfile->unlink_on_destroy(0);
942 binmode $dotfile, ':utf8';
943 print $dotfile $self->as_dot( $opts );
944 push( @cmd, $dotfile->filename );
945 run( \@cmd, ">", binary(), \$svg );
946 $svg = decode_utf8( $svg );
951 =head2 as_dot( \%options )
953 Returns a string that is the collation graph expressed in dot
954 (i.e. GraphViz) format. Options include:
969 my( $self, $opts ) = @_;
970 my $startrank = $opts->{'from'} if $opts;
971 my $endrank = $opts->{'to'} if $opts;
972 my $color_common = $opts->{'color_common'} if $opts;
973 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
974 && $self->end->rank > 100;
975 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
977 # Check the arguments
979 return if $endrank && $startrank > $endrank;
980 return if $startrank > $self->end->rank;
982 if( defined $endrank ) {
983 return if $endrank < 0;
984 $endrank = undef if $endrank == $self->end->rank;
987 my $graph_name = $self->tradition->name;
988 $graph_name =~ s/[^\w\s]//g;
989 $graph_name = join( '_', split( /\s+/, $graph_name ) );
997 'fillcolor' => 'white',
1002 'arrowhead' => 'open',
1003 'color' => '#000000',
1004 'fontcolor' => '#000000',
1007 my $dot = sprintf( "digraph %s {\n", $graph_name );
1008 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1009 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
1011 # Output substitute start/end readings if necessary
1013 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1016 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1018 if( $STRAIGHTENHACK ) {
1020 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1021 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1022 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1024 my %used; # Keep track of the readings that actually appear in the graph
1025 # Sort the readings by rank if we have ranks; this speeds layout.
1026 my @all_readings = $self->end->has_rank
1027 ? sort { $a->rank <=> $b->rank } $self->readings
1029 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1030 foreach my $reading ( @all_readings ) {
1031 # Only output readings within our rank range.
1032 next if $startrank && $reading->rank < $startrank;
1033 next if $endrank && $reading->rank > $endrank;
1034 $used{$reading->id} = 1;
1035 # Need not output nodes without separate labels
1036 next if $reading->id eq $reading->text;
1038 my $label = $reading->text;
1039 unless( $label =~ /^[[:punct:]]+$/ ) {
1040 $label .= '-' if $reading->join_next;
1041 $label = "-$label" if $reading->join_prior;
1043 $label =~ s/\"/\\\"/g;
1044 $rattrs->{'label'} = $label;
1045 $rattrs->{'id'} = $reading->id;
1046 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1047 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1050 # Add the real edges.
1051 my @edges = $self->paths;
1052 my( %substart, %subend );
1053 foreach my $edge ( @edges ) {
1054 # Do we need to output this edge?
1055 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1056 my $label = $self->_path_display_label( $opts,
1057 $self->path_witnesses( $edge ) );
1058 my $variables = { %edge_attrs, 'label' => $label };
1060 # Account for the rank gap if necessary
1061 my $rank0 = $self->reading( $edge->[0] )->rank
1062 if $self->reading( $edge->[0] )->has_rank;
1063 my $rank1 = $self->reading( $edge->[1] )->rank
1064 if $self->reading( $edge->[1] )->has_rank;
1065 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1066 $variables->{'minlen'} = $rank1 - $rank0;
1069 # EXPERIMENTAL: make edge width reflect no. of witnesses
1070 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1071 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1073 my $varopts = _dot_attr_string( $variables );
1074 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1075 $edge->[0], $edge->[1], $varopts );
1076 } elsif( $used{$edge->[0]} ) {
1077 $subend{$edge->[0]} = $edge->[1];
1078 } elsif( $used{$edge->[1]} ) {
1079 $substart{$edge->[1]} = $edge->[0];
1083 # If we are asked to, add relationship links
1084 if( exists $opts->{show_relations} ) {
1085 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1086 if( $filter eq 'transposition' ) {
1087 $filter =~ qr/^transposition$/;
1090 my @types = sort( map { $_->name } $self->relations->types );
1091 if( exists $opts->{graphcolors} ) {
1092 foreach my $tdx ( 0 .. $#types ) {
1093 $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1096 map { $typecolors{$_} = '#FFA14F' } @types;
1098 foreach my $redge ( $self->relationships ) {
1099 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1100 my $rel = $self->get_relationship( $redge );
1101 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1103 arrowhead => 'none',
1104 color => $typecolors{$rel->type},
1105 constraint => 'false',
1108 unless( exists $opts->{graphcolors} ) {
1109 $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1111 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1112 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1117 # Add substitute start and end edges if necessary
1118 foreach my $node ( keys %substart ) {
1119 my $witstr = $self->_path_display_label( $opts,
1120 $self->path_witnesses( $substart{$node}, $node ) );
1121 my $variables = { %edge_attrs, 'label' => $witstr };
1122 my $nrdg = $self->reading( $node );
1123 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1124 # Substart is actually one lower than $startrank
1125 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1127 my $varopts = _dot_attr_string( $variables );
1128 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1130 foreach my $node ( keys %subend ) {
1131 my $witstr = $self->_path_display_label( $opts,
1132 $self->path_witnesses( $node, $subend{$node} ) );
1133 my $variables = { %edge_attrs, 'label' => $witstr };
1134 my $varopts = _dot_attr_string( $variables );
1135 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1138 if( $STRAIGHTENHACK ) {
1139 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1140 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1147 sub _dot_attr_string {
1150 foreach my $k ( sort keys %$hash ) {
1151 my $v = $hash->{$k};
1152 push( @attrs, $k.'="'.$v.'"' );
1154 return( '[ ' . join( ', ', @attrs ) . ' ]' );
1157 =head2 path_witnesses( $edge )
1159 Returns the list of sigils whose witnesses are associated with the given edge.
1160 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1164 sub path_witnesses {
1165 my( $self, @edge ) = @_;
1166 # If edge is an arrayref, cope.
1167 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1168 my $e = shift @edge;
1171 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1175 # Helper function. Make a display label for the given witnesses, showing a.c.
1176 # witnesses only where the main witness is not also in the list.
1177 sub _path_display_label {
1181 map { $wits{$_} = 1 } @_;
1183 # If an a.c. wit is listed, remove it if the main wit is also listed.
1184 # Otherwise keep it for explicit listing.
1185 my $aclabel = $self->ac_label;
1187 foreach my $w ( sort keys %wits ) {
1188 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1189 if( exists $wits{$1} ) {
1192 push( @disp_ac, $w );
1197 if( $opts->{'explicit_wits'} ) {
1198 return join( ', ', sort keys %wits );
1200 # See if we are in a majority situation.
1201 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1202 $maj = $maj > 5 ? $maj : 5;
1203 if( scalar keys %wits > $maj ) {
1204 unshift( @disp_ac, 'majority' );
1205 return join( ', ', @disp_ac );
1207 return join( ', ', sort keys %wits );
1212 =head2 as_adjacency_list
1214 Returns a JSON structure that represents the collation sequence graph.
1218 use JSON qw/ from_json /;
1219 use Text::Tradition;
1221 my $t = Text::Tradition->new(
1223 'file' => 't/data/florilegium_graphml.xml' );
1224 my $c = $t->collation;
1226 # Make a connection so we can test rank preservation
1227 $c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1229 # Create an adjacency list of the whole thing; test the output.
1230 my $adj_whole = from_json( $c->as_adjacency_list() );
1231 is( scalar @$adj_whole, scalar $c->readings(),
1232 "Same number of nodes in graph and adjacency list" );
1233 my @adj_whole_edges;
1234 map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1235 is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1236 "Same number of edges in graph and adjacency list" );
1237 # Find the reading whose rank should be preserved
1238 my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1239 my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1240 is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1242 # Now create an adjacency list of just a portion. w76 to w122
1243 my $adj_part = from_json( $c->as_adjacency_list(
1244 { from => $c->reading('w76')->rank,
1245 to => $c->reading('w122')->rank }));
1246 is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1248 map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1249 is( scalar @adj_part_edges, 58,
1250 "Same number of edges in partial graph and adjacency list" );
1251 # Check for consistency
1253 map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1254 foreach my $edge ( @adj_part_edges ) {
1255 my $testid = $edge->{id};
1256 ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1263 sub as_adjacency_list {
1264 my( $self, $opts ) = @_;
1265 # Make a structure that contains all the nodes, the nodes they point to,
1266 # and the attributes of the edges that connect them.
1267 # [ { id: 'n0', label: 'Gallia', adjacent: [
1268 # { id: 'n1', label: 'P Q' } ,
1269 # { id: 'n2', label: 'R S', minlen: 2 } ] },
1270 # { id: 'n1', label: 'est', adjacent: [ ... ] },
1272 my $startrank = $opts->{'from'} || 0;
1273 my $endrank = $opts->{'to'} || $self->end->rank;
1275 $self->calculate_ranks()
1276 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
1278 foreach my $rdg ( $self->readings ) {
1281 # Figure out what the node's successors should be.
1282 if( $rdg eq $self->start && $startrank > 0 ) {
1283 # Connect the start node with all the nodes at startrank.
1284 # Lacunas should be included only if the node really has that rank.
1285 @successors = $self->readings_at_rank( $startrank, 1 );
1287 } elsif( $rdg->rank < $startrank
1288 || $rdg->rank > $endrank && $rdg ne $self->end ) {
1291 @successors = $rdg->successors;
1293 # Make sure that the end node is at the end of the successors
1294 # list if it is needed.
1295 if( grep { $_ eq $self->end } @successors ) {
1296 my @ts = grep { $_ ne $self->end } @successors;
1297 @successors = ( @ts, $self->end );
1298 } elsif ( grep { $_->rank > $endrank } @successors ) {
1299 push( @successors, $self->end );
1302 my $listitem = { id => $rdg->id, label => $rdg->text };
1305 foreach my $succ ( @successors ) {
1307 if( $phony eq 'start' ) {
1308 @edgewits = $succ->witnesses;
1309 } elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
1310 @edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1313 if( $succ eq $self->end ) {
1314 @edgewits = @endwits;
1315 } elsif( $succ->rank > $endrank ) {
1316 # These witnesses will point to 'end' instead, not to the
1318 push( @endwits, @edgewits );
1321 my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1322 my $edgedef = { id => $succ->id, label => $edgelabel };
1323 my $rankoffset = $succ->rank - $rdg->rank;
1324 if( $rankoffset > 1 and $succ ne $self->end ) {
1325 $edgedef->{minlen} = $rankoffset;
1327 push( @$adjacent, $edgedef );
1329 $listitem->{adjacent} = $adjacent;
1330 push( @$list, $listitem );
1332 return to_json( $list );
1337 Returns a GraphML representation of the collation. The GraphML will contain
1338 two graphs. The first expresses the attributes of the readings and the witness
1339 paths that link them; the second expresses the relationships that link the
1340 readings. This is the native transfer format for a tradition.
1344 use Text::Tradition;
1350 my $datafile = 't/data/florilegium_tei_ps.xml';
1351 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1353 'file' => $datafile,
1356 ok( $tradition, "Got a tradition object" );
1357 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1358 ok( $tradition->collation, "Tradition has a collation" );
1360 my $c = $tradition->collation;
1361 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1362 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1363 is( scalar $c->relationships, 0, "Collation has all relationships" );
1365 # Add a few relationships
1366 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1367 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1368 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1369 'is_significant' => 'yes' } );
1371 # Now write it to GraphML and parse it again.
1373 my $graphml = $c->as_graphml;
1374 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1375 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1376 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1377 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1378 my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1379 is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1381 # Now add a stemma, write to GraphML, and look at the output.
1383 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1384 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1385 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1386 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1387 $graphml = $c->as_graphml;
1388 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1395 ## TODO MOVE this to Tradition.pm and modularize it better
1397 my( $self, $options ) = @_;
1398 $self->calculate_ranks unless $self->_graphcalc_done;
1400 my $start = $options->{'from'}
1401 ? $self->reading( $options->{'from'} ) : $self->start;
1402 my $end = $options->{'to'}
1403 ? $self->reading( $options->{'to'} ) : $self->end;
1404 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1405 throw( 'Start node must be before end node' );
1407 # The readings need to be ranked for this to work.
1408 $start = $self->start unless $start->has_rank;
1409 $end = $self->end unless $end->has_rank;
1411 unless( $start eq $self->start ) {
1412 $rankoffset = $start->rank - 1;
1417 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1418 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1419 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1420 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1422 # Create the document and root node
1423 require XML::LibXML;
1424 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1425 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1426 $graphml->setDocumentElement( $root );
1427 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1428 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1430 # List of attribute types to save on our objects and their corresponding
1435 'Bool' => 'boolean',
1436 'ReadingID' => 'string',
1437 'RelationshipType' => 'string',
1438 'RelationshipScope' => 'string',
1439 'Ternary' => 'string',
1442 # Add the data keys for the graph. Include an extra key 'version' for the
1443 # GraphML output version.
1444 my %graph_data_keys;
1446 my %graph_attributes = ( 'version' => 'string' );
1447 # Graph attributes include those of Tradition and those of Collation.
1449 # TODO Use meta introspection method from duplicate_reading to do this
1450 # instead of naming custom keys.
1451 my $tmeta = $self->tradition->meta;
1452 my $cmeta = $self->meta;
1453 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1454 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1455 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1456 next if $attr->name =~ /^_/;
1457 next unless $save_types{$attr->type_constraint->name};
1458 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1460 # Extra custom keys for complex objects that should be saved in some form.
1461 # The subroutine should return a string, or undef/empty.
1462 if( $tmeta->has_method('stemmata') ) {
1463 $graph_attributes{'stemmata'} = sub {
1465 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1466 $self->tradition->stemmata;
1467 join( "\n", @stemstrs );
1471 if( $tmeta->has_method('user') ) {
1472 $graph_attributes{'user'} = sub {
1473 $self->tradition->user ? $self->tradition->user->id : undef
1477 foreach my $datum ( sort keys %graph_attributes ) {
1478 $graph_data_keys{$datum} = 'dg'.$gdi++;
1479 my $key = $root->addNewChild( $graphml_ns, 'key' );
1480 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1481 : $graph_attributes{$datum};
1482 $key->setAttribute( 'attr.name', $datum );
1483 $key->setAttribute( 'attr.type', $dtype );
1484 $key->setAttribute( 'for', 'graph' );
1485 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1488 # Add the data keys for reading nodes
1489 my %reading_attributes;
1490 my $rmeta = Text::Tradition::Collation::Reading->meta;
1491 foreach my $attr( $rmeta->get_all_attributes ) {
1492 next if $attr->name =~ /^_/;
1493 next unless $save_types{$attr->type_constraint->name};
1494 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1496 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1497 # Extra custom key for the reading morphology
1498 $reading_attributes{'lexemes'} = 'string';
1503 foreach my $datum ( sort keys %reading_attributes ) {
1504 $node_data_keys{$datum} = 'dn'.$ndi++;
1505 my $key = $root->addNewChild( $graphml_ns, 'key' );
1506 $key->setAttribute( 'attr.name', $datum );
1507 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1508 $key->setAttribute( 'for', 'node' );
1509 $key->setAttribute( 'id', $node_data_keys{$datum} );
1512 # Add the data keys for edges, that is, paths and relationships. Path
1513 # data does not come from a Moose class so is here manually.
1516 my %edge_attributes = (
1517 witness => 'string', # ID/label for a path
1518 extra => 'boolean', # Path key
1520 my @path_attributes = keys %edge_attributes; # track our manual additions
1521 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1522 foreach my $attr( $pmeta->get_all_attributes ) {
1523 next if $attr->name =~ /^_/;
1524 next unless $save_types{$attr->type_constraint->name};
1525 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1527 foreach my $datum ( sort keys %edge_attributes ) {
1528 $edge_data_keys{$datum} = 'de'.$edi++;
1529 my $key = $root->addNewChild( $graphml_ns, 'key' );
1530 $key->setAttribute( 'attr.name', $datum );
1531 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1532 $key->setAttribute( 'for', 'edge' );
1533 $key->setAttribute( 'id', $edge_data_keys{$datum} );
1536 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1537 my $xmlidname = $self->tradition->name;
1538 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1539 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1540 $xmlidname = '_'.$xmlidname;
1542 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1543 $sgraph->setAttribute( 'edgedefault', 'directed' );
1544 $sgraph->setAttribute( 'id', $xmlidname );
1545 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1546 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1547 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1548 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1549 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1551 # Tradition/collation attribute data
1552 foreach my $datum ( keys %graph_attributes ) {
1554 if( $datum eq 'version' ) {
1556 } elsif( ref( $graph_attributes{$datum} ) ) {
1557 my $sub = $graph_attributes{$datum};
1559 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1560 $value = $self->tradition->$datum;
1562 $value = $self->$datum;
1564 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1569 # Add our readings to the graph
1570 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1571 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1572 ( $n->rank < $start->rank || $n->rank > $end->rank );
1573 $use_readings{$n->id} = 1;
1574 # Add to the main graph
1575 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1576 my $node_xmlid = 'n' . $node_ctr++;
1577 $node_hash{ $n->id } = $node_xmlid;
1578 $node_el->setAttribute( 'id', $node_xmlid );
1579 foreach my $d ( keys %reading_attributes ) {
1581 # Custom serialization
1582 if( $d eq 'lexemes' ) {
1583 # If nval is a true value, we have lexemes so we need to
1584 # serialize them. Otherwise set nval to undef so that the
1585 # key is excluded from this reading.
1586 $nval = $nval ? $n->_serialize_lexemes : undef;
1587 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1590 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1591 # Adjust the ranks within the subgraph.
1592 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1593 : $nval - $rankoffset;
1595 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1600 # Add the path edges to the sequence graph
1602 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1603 # We add an edge in the graphml for every witness in $e.
1604 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1605 my @edge_wits = sort $self->path_witnesses( $e );
1606 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1607 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1608 # Skip any path from start to end; that witness is not in the subgraph.
1609 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1610 foreach my $wit ( @edge_wits ) {
1611 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1612 $node_hash{ $e->[0] },
1613 $node_hash{ $e->[1] } );
1614 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1615 $edge_el->setAttribute( 'source', $from );
1616 $edge_el->setAttribute( 'target', $to );
1617 $edge_el->setAttribute( 'id', $id );
1619 # It's a witness path, so add the witness
1621 my $key = $edge_data_keys{'witness'};
1622 # Is this an ante-corr witness?
1623 my $aclabel = $self->ac_label;
1624 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1625 # Keep the base witness
1627 # ...and record that this is an 'extra' reading path
1628 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1630 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1634 # Report the actual number of nodes and edges that went in
1635 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1636 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1638 # Add the relationship graph to the XML
1639 map { delete $edge_data_keys{$_} } @path_attributes;
1640 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1641 $node_data_keys{'id'}, \%edge_data_keys );
1643 # Save and return the thing
1644 my $result = decode_utf8( $graphml->toString(1) );
1648 sub _add_graphml_data {
1649 my( $el, $key, $value ) = @_;
1650 return unless defined $value;
1651 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1652 $data_el->setAttribute( 'key', $key );
1653 $data_el->appendText( $value );
1658 Returns a CSV alignment table representation of the collation graph, one
1659 row per witness (or witness uncorrected.)
1663 Returns a tab-separated alignment table representation of the collation graph,
1664 one row per witness (or witness uncorrected.)
1668 use Text::Tradition;
1676 my $datafile = 't/data/florilegium_tei_ps.xml';
1677 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1679 'file' => $datafile,
1682 my $c = $tradition->collation;
1683 # Export the thing to CSV
1684 my $csvstr = $c->as_csv();
1686 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1687 my @lines = split(/\n/, $csvstr );
1688 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1689 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1690 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1691 ok( @q_ac, "Found a layered witness" );
1693 my $t2 = Text::Tradition->new( input => 'Tabular',
1697 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1698 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1700 # Now do it with TSV
1701 my $tsvstr = $c->as_tsv();
1702 my $t3 = Text::Tradition->new( input => 'Tabular',
1706 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1707 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1709 my $table = $c->alignment_table;
1710 my $noaccsv = $c->as_csv({ noac => 1 });
1711 my @noaclines = split(/\n/, $noaccsv );
1712 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1713 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1714 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1716 my $safecsv = $c->as_csv({ safe_ac => 1});
1717 my @safelines = split(/\n/, $safecsv );
1718 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1719 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1720 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1721 ok( @q_ac, "Found a sanitized layered witness" );
1722 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1724 # Test relationship collapse
1725 $c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1726 $c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1728 my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1729 my $t4 = Text::Tradition->new( input => 'Tabular',
1731 string => $mergedtsv,
1733 is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1734 is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1736 # Test non-ASCII sigla
1737 my $t5 = Text::Tradition->new( input => 'Tabular',
1739 file => 't/data/armexample.xlsx',
1741 my $awittsv = $t5->collation->as_tsv({ noac => 1, ascii => 1 });
1742 my @awitlines = split( /\n/, $awittsv );
1743 like( $awitlines[0], qr/_A_5315622/, "Found ASCII sigil variant in TSV" );
1750 my( $self, $opts ) = @_;
1751 my $table = $self->alignment_table( $opts );
1752 my $csv_options = { binary => 1, quote_null => 0 };
1753 $csv_options->{'sep_char'} = $opts->{fieldsep};
1754 if( $opts->{fieldsep} eq "\t" ) {
1755 # If it is really tab separated, nothing is an escape char.
1756 $csv_options->{'quote_char'} = undef;
1757 $csv_options->{'escape_char'} = '';
1759 my $csv = Text::CSV->new( $csv_options );
1762 # Make the header row
1763 my @witnesses = map { $_->{'witness'} } @{$table->{'alignment'}};
1764 if( $opts->{ascii} ) {
1765 # TODO think of a fix for this
1766 throw( "Cannot currently produce ASCII sigla with witness layers" )
1767 unless $opts->{noac};
1768 my @awits = map { $self->tradition->witness( $_ )->ascii_sigil } @witnesses;
1769 @witnesses = @awits;
1771 $csv->combine( @witnesses );
1772 push( @result, $csv->string );
1774 # Make the rest of the rows
1775 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1776 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1777 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1778 # Quick and dirty collapse of requested relationship types
1779 if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1780 # Now substitute the reading in the relevant index of @row
1781 # for its merge-related reading
1784 my $thisr = shift @rowobjs;
1786 next if exists $substitutes{$thisr->{t}->text};
1787 # Make sure we don't have A <-> B substitutions.
1788 $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1789 foreach my $thatr ( @rowobjs ) {
1791 next if exists $substitutes{$thatr->{t}->text};
1792 my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1794 next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1795 # If we have got this far then we need to merge them.
1796 $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1799 @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1801 $csv->combine( @row );
1802 push( @result, $csv->string );
1804 return join( "\n", @result );
1809 my $opts = shift || {};
1810 $opts->{fieldsep} = ',';
1811 return $self->_tabular( $opts );
1816 my $opts = shift || {};
1817 $opts->{fieldsep} = "\t";
1818 return $self->_tabular( $opts );
1821 =head2 alignment_table
1823 Return a reference to an alignment table, in a slightly enhanced CollateX
1824 format which looks like this:
1826 $table = { alignment => [ { witness => "SIGIL",
1827 tokens => [ { t => "TEXT" }, ... ] },
1828 { witness => "SIG2",
1829 tokens => [ { t => "TEXT" }, ... ] },
1831 length => TEXTLEN };
1835 sub alignment_table {
1836 my( $self, $opts ) = @_;
1837 if( $self->has_cached_table ) {
1838 return $self->cached_table
1839 unless $opts->{noac} || $opts->{safe_ac};
1842 # Make sure we can do this
1843 throw( "Need a linear graph in order to make an alignment table" )
1844 unless $self->linear;
1845 $self->calculate_ranks()
1846 unless $self->_graphcalc_done && $self->end->has_rank;
1848 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1849 my @all_pos = ( 1 .. $self->end->rank - 1 );
1850 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1851 # say STDERR "Making witness row(s) for " . $wit->sigil;
1852 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1853 my @row = _make_witness_row( \@wit_path, \@all_pos );
1854 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1855 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1856 push( @{$table->{'alignment'}}, $witobj );
1857 if( $wit->is_layered && !$opts->{noac} ) {
1858 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1859 $wit->sigil.$self->ac_label );
1860 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1861 my $witlabel = $opts->{safe_ac}
1862 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1863 my $witacobj = { 'witness' => $witlabel,
1864 'tokens' => \@ac_row };
1865 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1866 push( @{$table->{'alignment'}}, $witacobj );
1869 unless( $opts->{noac} || $opts->{safe_ac} ) {
1870 $self->cached_table( $table );
1875 sub _make_witness_row {
1876 my( $path, $positions ) = @_;
1878 map { $char_hash{$_} = undef } @$positions;
1880 foreach my $rdg ( @$path ) {
1881 say STDERR "rank " . $rdg->rank if $debug;
1882 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1883 $char_hash{$rdg->rank} = { 't' => $rdg };
1885 my @row = map { $char_hash{$_} } @$positions;
1886 # Fill in lacuna markers for undef spots in the row
1887 my $last_el = shift @row;
1888 my @filled_row = ( $last_el );
1889 foreach my $el ( @row ) {
1890 # If we are using node reference, make the lacuna node appear many times
1891 # in the table. If not, use the lacuna tag.
1892 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1895 push( @filled_row, $el );
1902 =head1 NAVIGATION METHODS
1904 =head2 reading_sequence( $first, $last, $sigil, $backup )
1906 Returns the ordered list of readings, starting with $first and ending
1907 with $last, for the witness given in $sigil. If a $backup sigil is
1908 specified (e.g. when walking a layered witness), it will be used wherever
1909 no $sigil path exists. If there is a base text reading, that will be
1910 used wherever no path exists for $sigil or $backup.
1914 # TODO Think about returning some lazy-eval iterator.
1915 # TODO Get rid of backup; we should know from what witness is whether we need it.
1917 sub reading_sequence {
1918 my( $self, $start, $end, $witness ) = @_;
1920 $witness = $self->baselabel unless $witness;
1921 my @readings = ( $start );
1924 while( $n && $n->id ne $end->id ) {
1925 if( exists( $seen{$n->id} ) ) {
1926 throw( "Detected loop for $witness at " . $n->id );
1930 my $next = $self->next_reading( $n, $witness );
1932 throw( "Did not find any path for $witness from reading " . $n->id );
1934 push( @readings, $next );
1937 # Check that the last reading is our end reading.
1938 my $last = $readings[$#readings];
1939 throw( "Last reading found from " . $start->text .
1940 " for witness $witness is not the end!" ) # TODO do we get this far?
1941 unless $last->id eq $end->id;
1946 =head2 readings_at_rank( $rank )
1948 Returns a list of readings at a given rank, taken from the alignment table.
1952 sub readings_at_rank {
1953 my( $self, $rank, $nolacuna ) = @_;
1954 my $table = $self->alignment_table;
1955 # Table rank is real rank - 1.
1956 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1958 foreach my $e ( @elements ) {
1959 next unless ref( $e ) eq 'HASH';
1960 next unless exists $e->{'t'};
1961 my $rdg = $e->{'t'};
1962 next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
1963 $readings{$e->{'t'}->id} = $e->{'t'};
1965 return values %readings;
1968 =head2 next_reading( $reading, $sigil );
1970 Returns the reading that follows the given reading along the given witness
1976 # Return the successor via the corresponding path.
1978 my $answer = $self->_find_linked_reading( 'next', @_ );
1979 return undef unless $answer;
1980 return $self->reading( $answer );
1983 =head2 prior_reading( $reading, $sigil )
1985 Returns the reading that precedes the given reading along the given witness
1991 # Return the predecessor via the corresponding path.
1993 my $answer = $self->_find_linked_reading( 'prior', @_ );
1994 return $self->reading( $answer );
1997 sub _find_linked_reading {
1998 my( $self, $direction, $node, $path ) = @_;
2000 # Get a backup if we are dealing with a layered witness
2002 my $aclabel = $self->ac_label;
2003 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
2007 my @linked_paths = $direction eq 'next'
2008 ? $self->sequence->edges_from( $node )
2009 : $self->sequence->edges_to( $node );
2010 return undef unless scalar( @linked_paths );
2012 # We have to find the linked path that contains all of the
2013 # witnesses supplied in $path.
2014 my( @path_wits, @alt_path_wits );
2015 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
2016 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
2019 foreach my $le ( @linked_paths ) {
2020 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
2023 my @le_wits = sort $self->path_witnesses( $le );
2024 if( _is_within( \@path_wits, \@le_wits ) ) {
2025 # This is the right path.
2026 return $direction eq 'next' ? $le->[1] : $le->[0];
2027 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2031 # Got this far? Return the alternate path if it exists.
2032 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
2035 # Got this far? Return the base path if it exists.
2036 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
2039 # Got this far? We have no appropriate path.
2040 warn "Could not find $direction node from " . $node->id
2041 . " along path $path";
2047 my( $set1, $set2 ) = @_;
2048 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
2049 foreach my $el ( @$set1 ) {
2050 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
2055 # Return the string that joins together a list of witnesses for
2056 # display on a single path.
2057 sub _witnesses_of_label {
2058 my( $self, $label ) = @_;
2059 my $regex = $self->wit_list_separator;
2060 my @answer = split( /\Q$regex\E/, $label );
2064 =head2 common_readings
2066 Returns the list of common readings in the graph (i.e. those readings that are
2067 shared by all non-lacunose witnesses.)
2071 sub common_readings {
2073 my @common = grep { $_->is_common } $self->readings;
2077 =head2 path_text( $sigil, [, $start, $end ] )
2079 Returns the text of a witness (plus its backup, if we are using a layer)
2080 as stored in the collation. The text is returned as a string, where the
2081 individual readings are joined with spaces and the meta-readings (e.g.
2082 lacunae) are omitted. Optional specification of $start and $end allows
2083 the generation of a subset of the witness text.
2088 my( $self, $wit, $start, $end ) = @_;
2089 $start = $self->start unless $start;
2090 $end = $self->end unless $end;
2091 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
2094 foreach my $r ( @path ) {
2095 unless ( $r->join_prior || !$last || $last->join_next ) {
2098 $pathtext .= $r->text;
2104 =head1 INITIALIZATION METHODS
2106 These are mostly for use by parsers.
2108 =head2 make_witness_path( $witness )
2110 Link the array of readings contained in $witness->path (and in
2111 $witness->uncorrected_path if it exists) into collation paths.
2112 Clear out the arrays when finished.
2114 =head2 make_witness_paths
2116 Call make_witness_path for all witnesses in the tradition.
2120 # For use when a collation is constructed from a base text and an apparatus.
2121 # We have the sequences of readings and just need to add path edges.
2122 # When we are done, clear out the witness path attributes, as they are no
2124 # TODO Find a way to replace the witness path attributes with encapsulated functions?
2126 sub make_witness_paths {
2128 foreach my $wit ( $self->tradition->witnesses ) {
2129 # say STDERR "Making path for " . $wit->sigil;
2130 $self->make_witness_path( $wit );
2134 sub make_witness_path {
2135 my( $self, $wit ) = @_;
2136 my @chain = @{$wit->path};
2137 my $sig = $wit->sigil;
2138 # Add start and end if necessary
2139 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2140 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2141 foreach my $idx ( 0 .. $#chain-1 ) {
2142 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2144 if( $wit->is_layered ) {
2145 @chain = @{$wit->uncorrected_path};
2146 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2147 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2148 foreach my $idx( 0 .. $#chain-1 ) {
2149 my $source = $chain[$idx];
2150 my $target = $chain[$idx+1];
2151 $self->add_path( $source, $target, $sig.$self->ac_label )
2152 unless $self->has_path( $source, $target, $sig );
2156 $wit->clear_uncorrected_path;
2159 =head2 calculate_ranks
2161 Calculate the reading ranks (that is, their aligned positions relative
2162 to each other) for the graph. This can only be called on linear collations.
2166 use Text::Tradition;
2168 my $cxfile = 't/data/Collatex-16.xml';
2169 my $t = Text::Tradition->new(
2171 'input' => 'CollateX',
2174 my $c = $t->collation;
2177 my $table = $c->alignment_table;
2178 ok( $c->has_cached_table, "Alignment table was cached" );
2179 is( $c->alignment_table, $table, "Cached table returned upon second call" );
2180 $c->calculate_ranks;
2181 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2182 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2183 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2184 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2185 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2191 sub calculate_ranks {
2193 # Save the existing ranks, in case we need to invalidate the cached SVG.
2194 throw( "Cannot calculate ranks on a non-linear graph" )
2195 unless $self->linear;
2197 map { $existing_ranks{$_} = $_->rank } $self->readings;
2199 # Do the rankings based on the relationship equivalence graph, starting
2200 # with the start node.
2201 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2203 # Transfer our rankings from the topological graph to the real one.
2204 foreach my $r ( $self->readings ) {
2205 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2206 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2208 # Die. Find the last rank we calculated.
2209 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2210 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2212 my $last = pop @all_defined;
2213 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2216 # Do we need to invalidate the cached data?
2217 if( $self->has_cached_table ) {
2218 foreach my $r ( $self->readings ) {
2219 next if defined( $existing_ranks{$r} )
2220 && $existing_ranks{$r} == $r->rank;
2221 # Something has changed, so clear the cache
2222 $self->_clear_cache;
2223 # ...and recalculate the common readings.
2224 $self->calculate_common_readings();
2228 # The graph calculation information is now up to date.
2229 $self->_graphcalc_done(1);
2234 $self->wipe_table if $self->has_cached_table;
2238 =head2 flatten_ranks
2240 A convenience method for parsing collation data. Searches the graph for readings
2241 with the same text at the same rank, and merges any that are found.
2246 my ( $self, %args ) = shift;
2247 my %unique_rank_rdg;
2249 foreach my $p ( $self->identical_readings( %args ) ) {
2250 # say STDERR "Combining readings at same rank: @$p";
2252 $self->merge_readings( @$p );
2253 # TODO see if this now makes a common point.
2255 # If we merged readings, the ranks are still fine but the alignment
2256 # table is wrong. Wipe it.
2257 $self->wipe_table() if $changed;
2260 =head2 identical_readings
2261 =head2 identical_readings( start => $startnode, end => $endnode )
2262 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2264 Goes through the graph identifying all pairs of readings that appear to be
2265 identical, and therefore able to be merged into a single reading. Returns the
2266 relevant identical pairs. Can be restricted to run over only a part of the
2267 graph, specified either by node or by rank.
2271 sub identical_readings {
2272 my ( $self, %args ) = @_;
2273 # Find where we should start and end.
2274 my $startrank = $args{startrank} || 0;
2275 if( $args{start} ) {
2276 throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2277 && $self->reading( $args{start} )->has_rank;
2278 $startrank = $self->reading( $args{start} )->rank;
2280 my $endrank = $args{endrank} || $self->end->rank;
2282 throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2283 && $self->reading( $args{end} )->has_rank;
2284 $endrank = $self->reading( $args{end} )->rank;
2287 # Make sure the ranks are correct.
2288 unless( $self->_graphcalc_done ) {
2289 $self->calculate_ranks;
2291 # Go through the readings looking for duplicates.
2292 my %unique_rank_rdg;
2294 foreach my $rdg ( $self->readings ) {
2295 next unless $rdg->has_rank;
2296 my $rk = $rdg->rank;
2297 next if $rk > $endrank || $rk < $startrank;
2298 my $key = $rk . "||" . $rdg->text;
2299 if( exists $unique_rank_rdg{$key} ) {
2300 # Make sure they don't have different grammatical forms
2301 my $ur = $unique_rank_rdg{$key};
2302 if( $rdg->is_identical( $ur ) ) {
2303 push( @pairs, [ $ur, $rdg ] );
2306 $unique_rank_rdg{$key} = $rdg;
2314 =head2 calculate_common_readings
2316 Goes through the graph identifying the readings that appear in every witness
2317 (apart from those with lacunae at that spot.) Marks them as common and returns
2322 use Text::Tradition;
2324 my $cxfile = 't/data/Collatex-16.xml';
2325 my $t = Text::Tradition->new(
2327 'input' => 'CollateX',
2330 my $c = $t->collation;
2332 my @common = $c->calculate_common_readings();
2333 is( scalar @common, 8, "Found correct number of common readings" );
2334 my @marked = sort $c->common_readings();
2335 is( scalar @common, 8, "All common readings got marked as such" );
2336 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2337 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2343 sub calculate_common_readings {
2346 map { $_->is_common( 0 ) } $self->readings;
2347 # Implicitly calls calculate_ranks
2348 my $table = $self->alignment_table;
2349 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2350 my @row = map { $_->{'tokens'}->[$idx]
2351 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2352 @{$table->{'alignment'}};
2354 foreach my $r ( @row ) {
2356 $hash{$r->id} = $r unless $r->is_meta;
2358 $hash{'UNDEF'} = $r;
2361 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2362 my( $r ) = values %hash;
2364 push( @common, $r );
2370 =head2 text_from_paths
2372 Calculate the text array for all witnesses from the path, for later consistency
2373 checking. Only to be used if there is no non-graph-based way to know the
2378 sub text_from_paths {
2380 foreach my $wit ( $self->tradition->witnesses ) {
2381 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2383 foreach my $r ( @readings ) {
2384 next if $r->is_meta;
2385 push( @text, $r->text );
2387 $wit->text( \@text );
2388 if( $wit->is_layered ) {
2389 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2390 $wit->sigil.$self->ac_label );
2392 foreach my $r ( @ucrdgs ) {
2393 next if $r->is_meta;
2394 push( @uctext, $r->text );
2396 $wit->layertext( \@uctext );
2401 =head1 UTILITY FUNCTIONS
2403 =head2 common_predecessor( $reading_a, $reading_b )
2405 Find the last reading that occurs in sequence before both the given readings.
2406 At the very least this should be $self->start.
2408 =head2 common_successor( $reading_a, $reading_b )
2410 Find the first reading that occurs in sequence after both the given readings.
2411 At the very least this should be $self->end.
2415 use Text::Tradition;
2417 my $cxfile = 't/data/Collatex-16.xml';
2418 my $t = Text::Tradition->new(
2420 'input' => 'CollateX',
2423 my $c = $t->collation;
2425 is( $c->common_predecessor( 'n24', 'n23' )->id,
2426 'n20', "Found correct common predecessor" );
2427 is( $c->common_successor( 'n24', 'n23' )->id,
2428 '__END__', "Found correct common successor" );
2430 is( $c->common_predecessor( 'n19', 'n17' )->id,
2431 'n16', "Found correct common predecessor for readings on same path" );
2432 is( $c->common_successor( 'n21', 'n10' )->id,
2433 '__END__', "Found correct common successor for readings on same path" );
2439 ## Return the closest reading that is a predecessor of both the given readings.
2440 sub common_predecessor {
2442 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2443 return $self->_common_in_path( $r1, $r2, 'predecessors' );
2446 sub common_successor {
2448 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2449 return $self->_common_in_path( $r1, $r2, 'successors' );
2453 # TODO think about how to do this without ranks...
2454 sub _common_in_path {
2455 my( $self, $r1, $r2, $dir ) = @_;
2456 my $iter = $self->end->rank;
2458 my @last_r1 = ( $r1 );
2459 my @last_r2 = ( $r2 );
2460 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2462 # say STDERR "Finding common $dir for $r1, $r2";
2463 while( !@candidates ) {
2464 last unless $iter--; # Avoid looping infinitely
2465 # Iterate separately down the graph from r1 and r2
2466 my( @new_lc1, @new_lc2 );
2467 foreach my $lc ( @last_r1 ) {
2468 foreach my $p ( $lc->$dir ) {
2469 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2470 # say STDERR "Path candidate $p from $lc";
2471 push( @candidates, $p );
2472 } elsif( !$all_seen{$p->id} ) {
2473 $all_seen{$p->id} = 'r1';
2474 push( @new_lc1, $p );
2478 foreach my $lc ( @last_r2 ) {
2479 foreach my $p ( $lc->$dir ) {
2480 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2481 # say STDERR "Path candidate $p from $lc";
2482 push( @candidates, $p );
2483 } elsif( !$all_seen{$p->id} ) {
2484 $all_seen{$p->id} = 'r2';
2485 push( @new_lc2, $p );
2489 @last_r1 = @new_lc1;
2490 @last_r2 = @new_lc2;
2492 my @answer = sort { $a->rank <=> $b->rank } @candidates;
2493 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2497 Text::Tradition::Error->throw(
2498 'ident' => 'Collation error',
2504 __PACKAGE__->meta->make_immutable;
2510 =item * Rework XML serialization in a more modular way
2516 This package is free software and is provided "as is" without express
2517 or implied warranty. You can redistribute it and/or modify it under
2518 the same terms as Perl itself.
2522 Tara L Andrews E<lt>aurum@cpan.orgE<gt>