lib/Text/Tradition/Collation.pm

   1 package Text::Tradition::Collation;
   2
   3 use Graph::Easy;
   4 use IPC::Run qw( run binary );
   5 use Text::Tradition::Collation::Reading;
   6 use Text::Tradition::Collation::Path;
   7 use XML::LibXML;
   8 use Moose;
   9
  10 has 'graph' => (
  11     is => 'ro',
  12     isa => 'Graph::Easy',
  13     handles => {
  14         add_reading => 'add_node',
  15         del_reading => 'del_node',
  16         add_path => 'add_edge',
  17         del_path => 'del_edge',
  18         reading => 'node',
  19         path => 'edge',
  20         readings => 'nodes',
  21         paths => 'edges',
  22         relationships => 'edges',
  23     },
  24     default => sub { Graph::Easy->new( undirected => 0 ) },
  25     );
  26
  27
  28 has 'tradition' => (
  29     is => 'rw',
  30     isa => 'Text::Tradition',
  31     );
  32
  33 has 'svg' => (
  34     is => 'ro',
  35     isa => 'Str',
  36     writer => '_save_svg',
  37     predicate => 'has_svg',
  38     );
  39
  40 has 'graphml' => (
  41     is => 'ro',
  42     isa => 'Str',
  43     writer => '_save_graphml',
  44     predicate => 'has_graphml',
  45     );
  46
  47 # Keeps track of the lemmas within the collation.  At most one lemma
  48 # per position in the graph.
  49 has 'lemmata' => (
  50     is => 'ro',
  51     isa => 'HashRef[Maybe[Str]]',
  52     default => sub { {} },
  53     );
  54
  55 has 'wit_list_separator' => (
  56     is => 'rw',
  57     isa => 'Str',
  58     default => ', ',
  59     );
  60
  61 has 'baselabel' => (
  62     is => 'rw',
  63     isa => 'Str',
  64     default => 'base text',
  65     );
  66
  67 has 'collapsed' => (
  68     is => 'rw',
  69     isa => 'Bool',
  70     );
  71
  72 has 'linear' => (
  73     is => 'rw',
  74     isa => 'Bool',
  75     default => 1,
  76     );
  77
  78
  79 # The collation can be created two ways:
  80 # 1. Collate a set of witnesses (with CollateX I guess) and process
  81 #    the results as in 2.
  82 # 2. Read a pre-prepared collation in one of a variety of formats,
  83 #    and make the graph from that.
  84
  85 # The graph itself will (for now) be immutable, and the positions
  86 # within the graph will also be immutable.  We need to calculate those
  87 # positions upon graph construction.  The equivalences between graph
  88 # nodes will be mutable, entirely determined by the user (or possibly
  89 # by some semantic pre-processing provided by the user.)  So the
  90 # constructor should just make an empty equivalences object.  The
  91 # constructor will also need to make the witness objects, if we didn't
  92 # come through option 1.
  93
  94 sub BUILD {
  95     my( $self, $args ) = @_;
  96     $self->graph->use_class('node', 'Text::Tradition::Collation::Reading');
  97     $self->graph->use_class('edge', 'Text::Tradition::Collation::Path');
  98
  99     # Pass through any graph-specific options.
 100     my $shape = exists( $args->{'shape'} ) ? $args->{'shape'} : 'ellipse';
 101     $self->graph->set_attribute( 'node', 'shape', $shape );
 102 }
 103
 104 # Wrapper around add_path
 105
 106 around add_path => sub {
 107     my $orig = shift;
 108     my $self = shift;
 109
 110     # Make sure there are three arguments
 111     unless( @_ == 3 ) {
 112         warn "Call add_path with args source, target, witness";
 113         return;
 114     }
 115     # Make sure the proposed path does not yet exist
 116     my( $source, $target, $wit ) = @_;
 117     $source = $self->reading( $source )
 118         unless ref( $source ) eq 'Text::Tradition::Collation::Reading';
 119     $target = $self->reading( $target )
 120         unless ref( $target ) eq 'Text::Tradition::Collation::Reading';
 121     foreach my $path ( $source->edges_to( $target ) ) {
 122         if( $path->label eq $wit ) {
 123             return;
 124         }
 125     }
 126     # Do the deed
 127     $self->$orig( @_ );
 128 };
 129
 130 # Wrapper around paths
 131 around paths => sub {
 132     my $orig = shift;
 133     my $self = shift;
 134
 135     my @result = grep { $_->class eq 'edge.path' } $self->$orig( @_ );
 136     return @result;
 137 };
 138
 139 around relationships => sub {
 140     my $orig = shift;
 141     my $self = shift;
 142     my @result = grep { $_->class eq 'edge.relationship' } $self->$orig( @_ );
 143     return @result;
 144 };
 145
 146 # Wrapper around merge_nodes
 147
 148 sub merge_readings {
 149     my $self = shift;
 150     my $first_node = shift;
 151     my $second_node = shift;
 152     $first_node->merge_from( $second_node );
 153     unshift( @_, $first_node, $second_node );
 154     return $self->graph->merge_nodes( @_ );
 155 }
 156
 157 # Extra graph-alike utility
 158 sub has_path {
 159     my( $self, $source, $target, $label ) = @_;
 160     my @paths = $source->edges_to( $target );
 161     my @relevant = grep { $_->label eq $label } @paths;
 162     return scalar @paths;
 163 }
 164
 165 ## Dealing with relationships between readings.  This is a different
 166 ## sort of graph edge.
 167
 168 sub add_relationship {
 169     my( $self, $type, $source, $target, $global ) = @_;
 170     my $rel = Text::Tradition::Collation::Relationship->new(
 171             'sort' => $type,
 172             'global' => $global,
 173             'orig_relation' => [ $source, $target ],
 174     );
 175     print STDERR sprintf( "Setting relationship %s between readings %s (%s)"
 176                           . " and %s (%s)\n", $type,
 177                           $source->label, $source->name,
 178                           $target->label, $target->name );
 179     $self->graph->add_edge( $source, $target, $rel );
 180     if( $global ) {
 181         # Look for all readings with the source label, and if there are
 182         # colocated readings with the target label, join them too.
 183         foreach my $r ( $self->readings() ) {
 184             next unless $r->label eq $source->label;
 185             my @colocated = grep { $_->label eq $target->label }
 186                 $self->same_position_as( $r );
 187             if( @colocated ) {
 188                 warn "Multiple readings with same label at same position!"
 189                     if @colocated > 1;
 190                 my $dup_rel = Text::Tradition::Collation::Relationship->new(
 191                     'sort' => $type,
 192                     'global' => $global,
 193                     'orig_relation' => [ $source, $target ],
 194                     );
 195                 $self->graph->add_edge( $r, $colocated[0], $dup_rel );
 196             }
 197         }
 198     }
 199 }
 200
 201 =head2 Output method(s)
 202
 203 =over
 204
 205 =item B<as_svg>
 206
 207 print $graph->as_svg( $recalculate );
 208
 209 Returns an SVG string that represents the graph.  Uses GraphViz to do
 210 this, because Graph::Easy doesn\'t cope well with long graphs. Unless
 211 $recalculate is passed (and is a true value), the method will return a
 212 cached copy of the SVG after the first call to the method.
 213
 214 =cut
 215
 216 sub as_svg {
 217     my( $self, $recalc ) = @_;
 218     return $self->svg if $self->has_svg;
 219
 220     $self->collapse_graph_paths();
 221
 222     my @cmd = qw/dot -Tsvg/;
 223     my( $svg, $err );
 224     my $in = $self->as_dot();
 225     run( \@cmd, \$in, ">", binary(), \$svg );
 226     $self->_save_svg( $svg );
 227     $self->expand_graph_paths();
 228     return $svg;
 229 }
 230
 231 =item B<as_dot>
 232
 233 print $graph->as_dot( $view, $recalculate );
 234
 235 Returns a string that is the collation graph expressed in dot
 236 (i.e. GraphViz) format.  The 'view' argument determines what kind of
 237 graph is produced.
 238     * 'path': a graph of witness paths through the collation (DEFAULT)
 239     * 'relationship': a graph of how collation readings relate to
 240       each other
 241
 242 =cut
 243
 244 sub as_dot {
 245     my( $self, $view ) = @_;
 246     $view = 'path' unless $view;
 247     # TODO consider making some of these things configurable
 248     my $dot = sprintf( "digraph %s {\n", $self->tradition->name );
 249     $dot .= "\tedge [ arrowhead=open ];\n";
 250     $dot .= "\tgraph [ rankdir=LR ];\n";
 251     $dot .= sprintf( "\tnode [ fontsize=%d, fillcolor=%s, style=%s, shape=%s ];\n",
 252                      11, "white", "filled", $self->graph->get_attribute( 'node', 'shape' ) );
 253
 254     foreach my $reading ( $self->readings ) {
 255         next if $reading->name eq $reading->label;
 256         $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ]\n", $reading->name, $reading->label );
 257     }
 258
 259     my @edges = $view eq 'relationship' ? $self->relationships : $self->paths;
 260     foreach my $edge ( @edges ) {
 261         $dot .= sprintf( "\t\"%s\" -> \"%s\" [ color=\"%s\", fontcolor=\"%s\", label=\"%s\" ]\n",
 262                          $edge->from->name, $edge->to->name, '#000000', '#000000', $edge->label );
 263     }
 264
 265     $dot .= "}\n";
 266     return $dot;
 267 }
 268
 269 =item B<as_graphml>
 270
 271 print $graph->as_graphml( $recalculate )
 272
 273 Returns a GraphML representation of the collation graph, with
 274 transposition information and position information. Unless
 275 $recalculate is passed (and is a true value), the method will return a
 276 cached copy of the SVG after the first call to the method.
 277
 278 =cut
 279
 280 sub as_graphml {
 281     my( $self, $recalc ) = @_;
 282     return $self->graphml if $self->has_graphml;
 283
 284     # Some namespaces
 285     my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
 286     my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
 287     my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
 288         'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
 289
 290     # Create the document and root node
 291     my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
 292     my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
 293     $graphml->setDocumentElement( $root );
 294     $root->setNamespace( $xsi_ns, 'xsi', 0 );
 295     $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
 296
 297     # Add the data keys for nodes
 298     my @node_data = ( 'name', 'reading', 'identical', 'position' );
 299     foreach my $ndi ( 0 .. $#node_data ) {
 300         my $key = $root->addNewChild( $graphml_ns, 'key' );
 301         $key->setAttribute( 'attr.name', $node_data[$ndi] );
 302         $key->setAttribute( 'attr.type', 'string' );
 303         $key->setAttribute( 'for', 'node' );
 304         $key->setAttribute( 'id', 'd'.$ndi );
 305     }
 306
 307     # Add the data keys for edges, i.e. witnesses
 308     my $wit_ctr = 0;
 309     foreach my $wit_key( qw/ main ante_corr / ) {
 310         my $key = $root->addNewChild( $graphml_ns, 'key' );
 311         $key->setAttribute( 'attr.name', "witness_$wit_key" );
 312         $key->setAttribute( 'attr.type', 'string' );
 313         $key->setAttribute( 'for', 'edge' );
 314         $key->setAttribute( 'id', 'w'.$wit_ctr++ );
 315     }
 316
 317     # Add the graph, its nodes, and its edges
 318     my $graph = $root->addNewChild( $graphml_ns, 'graph' );
 319     $graph->setAttribute( 'edgedefault', 'directed' );
 320     $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful
 321     $graph->setAttribute( 'parse.edgeids', 'canonical' );
 322     $graph->setAttribute( 'parse.edges', scalar($self->paths) );
 323     $graph->setAttribute( 'parse.nodeids', 'canonical' );
 324     $graph->setAttribute( 'parse.nodes', scalar($self->readings) );
 325     $graph->setAttribute( 'parse.order', 'nodesfirst' );
 326
 327     my $node_ctr = 0;
 328     my %node_hash;
 329     foreach my $n ( sort { $a->name cmp $b->name } $self->readings ) {
 330         my %this_node_data = ();
 331         foreach my $ndi ( 0 .. $#node_data ) {
 332             my $key = $node_data[$ndi];
 333             if( $key eq 'name' ) {
 334                 $this_node_data{'d'.$ndi} = $n->name;
 335             } elsif( $key eq 'token' ) {
 336                 $this_node_data{'d'.$ndi} = $n->label;
 337             } elsif( $key eq 'identical' && $n->has_primary ) {
 338                 $this_node_data{'d'.$ndi} = $n->primary->name;
 339             } elsif( $key eq 'position' ) {
 340                 $this_node_data{'d'.$ndi} = $n->position;
 341             }
 342         }
 343         my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
 344         my $node_xmlid = 'n' . $node_ctr++;
 345         $node_hash{ $n->name } = $node_xmlid;
 346         $node_el->setAttribute( 'id', $node_xmlid );
 347
 348         foreach my $dk ( keys %this_node_data ) {
 349             my $d_el = $node_el->addNewChild( $graphml_ns, 'data' );
 350             $d_el->setAttribute( 'key', $dk );
 351             $d_el->appendText( $this_node_data{$dk} );
 352         }
 353     }
 354
 355     my $edge_ctr = 0;
 356     foreach my $e ( sort { $a->from->name cmp $b->from->name } $self->paths() ) {
 357         my( $name, $from, $to ) = ( 'e'.$edge_ctr++,
 358                                     $node_hash{ $e->from->name() },
 359                                     $node_hash{ $e->to->name() } );
 360         my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' );
 361         $edge_el->setAttribute( 'source', $from );
 362         $edge_el->setAttribute( 'target', $to );
 363         $edge_el->setAttribute( 'id', $name );
 364         # Add the witness
 365         my $base = $e->label;
 366         my $key = 'w0';
 367         # TODO kind of hacky
 368         if( $e->label =~ /^(.*?)\s+(\(a\.c\.\))$/ ) {
 369             $base = $1;
 370             $key = 'w1';
 371         }
 372         my $wit_el = $edge_el->addNewChild( $graphml_ns, 'data' );
 373         $wit_el->setAttribute( 'key', $key );
 374         $wit_el->appendText( $base );
 375     }
 376
 377     # Return the thing
 378     $self->_save_graphml( $graphml->toString(1) );
 379     return $graphml->toString(1);
 380 }
 381
 382 sub _make_xml_attr {
 383     my $str = shift;
 384     $str =~ s/\s/_/g;
 385     $str =~ s/\W//g;
 386     $str =~ "a$str" if $str =~ /^\d/;
 387     return $str;
 388 }
 389
 390 sub collapse_graph_paths {
 391     my $self = shift;
 392     # Our collation graph has an path per witness.  This is great for
 393     # calculation purposes, but terrible for display.  Thus we want to
 394     # display only one path between any two nodes.
 395
 396     return if $self->collapsed;
 397
 398     print STDERR "Collapsing witness paths in graph...\n";
 399
 400     # Don't list out every witness if we have more than half to list.
 401     my $majority = int( scalar( @{$self->tradition->witnesses} ) / 2 ) + 1;
 402     # But don't compress if there are only a few witnesses.
 403     $majority = 4 if $majority < 4;
 404     foreach my $node( $self->readings ) {
 405         my $newlabels = {};
 406         # We will visit each node, so we only look ahead.
 407         foreach my $edge ( $node->outgoing() ) {
 408             next unless $edge->class eq 'edge.path';
 409             add_hash_entry( $newlabels, $edge->to->name, $edge->name );
 410             $self->del_path( $edge );
 411         }
 412
 413         foreach my $newdest ( keys %$newlabels ) {
 414             my $label;
 415             my @compressed_wits = ();
 416             if( @{$newlabels->{$newdest}} < $majority ) {
 417                 $label = join( ', ', sort( @{$newlabels->{$newdest}} ) );
 418             } else {
 419                 ## TODO FIX THIS HACK
 420                 my @aclabels;
 421                 foreach my $wit ( @{$newlabels->{$newdest}} ) {
 422                     if( $wit =~ /^(.*?)(\s*\(?a\.\s*c\.\)?)$/ ) {
 423                         push( @aclabels, $wit );
 424                     } else {
 425                         push( @compressed_wits, $wit );
 426                     }
 427                 }
 428                 $label = join( ', ', 'majority', sort( @aclabels ) );
 429             }
 430
 431             my $newpath =
 432                 $self->add_path( $node, $self->reading( $newdest ), $label );
 433             if( @compressed_wits ) {
 434                 $newpath->hidden_witnesses( \@compressed_wits );
 435             }
 436         }
 437     }
 438
 439     $self->collapsed( 1 );
 440 }
 441
 442 sub expand_graph_paths {
 443     my $self = shift;
 444     # Our collation graph has only one path between any two nodes.
 445     # This is great for display, but not so great for analysis.
 446     # Expand this so that each witness has its own path between any
 447     # two reading nodes.
 448     return unless $self->collapsed;
 449
 450     print STDERR "Expanding witness paths in graph...\n";
 451     foreach my $path( $self->paths ) {
 452         my $from = $path->from;
 453         my $to = $path->to;
 454         my @wits = split( /, /, $path->label );
 455         if( $path->has_hidden_witnesses ) {
 456             push( @wits, @{$path->hidden_witnesses} );
 457         }
 458         $self->del_path( $path );
 459         foreach ( @wits ) {
 460             $self->add_path( $from, $to, $_ );
 461         }
 462     }
 463     $self->collapsed( 0 );
 464 }
 465
 466 =back
 467
 468 =head2 Navigation methods
 469
 470 =over
 471
 472 =item B<start>
 473
 474 my $beginning = $collation->start();
 475
 476 Returns the beginning of the collation, a meta-reading with label '#START#'.
 477
 478 =cut
 479
 480 sub start {
 481     # Return the beginning reading of the graph.
 482     my $self = shift;
 483     my( $new_start ) = @_;
 484     if( $new_start ) {
 485         $self->del_reading( '#START#' );
 486         $self->graph->rename_node( $new_start, '#START#' );
 487     }
 488     return $self->reading('#START#');
 489 }
 490
 491 =item B<reading_sequence>
 492
 493 my @readings = $graph->reading_sequence( $first, $last, $path[, $alt_path] );
 494
 495 Returns the ordered list of readings, starting with $first and ending
 496 with $last, along the given witness path.  If no path is specified,
 497 assume that the path is that of the base text (if any.)
 498
 499 =cut
 500
 501 sub reading_sequence {
 502     my( $self, $start, $end, $witness, $backup ) = @_;
 503
 504     $witness = $self->baselabel unless $witness;
 505     my @readings = ( $start );
 506     my %seen;
 507     my $n = $start;
 508     while( $n && $n ne $end ) {
 509         if( exists( $seen{$n->name()} ) ) {
 510             warn "Detected loop at " . $n->name();
 511             last;
 512         }
 513         $seen{$n->name()} = 1;
 514
 515         my $next = $self->next_reading( $n, $witness, $backup );
 516         warn "Did not find any path for $witness from reading " . $n->name
 517             unless $next;
 518         push( @readings, $next );
 519         $n = $next;
 520     }
 521     # Check that the last reading is our end reading.
 522     my $last = $readings[$#readings];
 523     warn "Last reading found from " . $start->label() .
 524         " for witness $witness is not the end!"
 525         unless $last eq $end;
 526
 527     return @readings;
 528 }
 529
 530 =item B<next_reading>
 531
 532 my $next_reading = $graph->next_reading( $reading, $witpath );
 533
 534 Returns the reading that follows the given reading along the given witness
 535 path.
 536
 537 =cut
 538
 539 sub next_reading {
 540     # Return the successor via the corresponding path.
 541     my $self = shift;
 542     return $self->_find_linked_reading( 'next', @_ );
 543 }
 544
 545 =item B<prior_reading>
 546
 547 my $prior_reading = $graph->prior_reading( $reading, $witpath );
 548
 549 Returns the reading that precedes the given reading along the given witness
 550 path.
 551
 552 =cut
 553
 554 sub prior_reading {
 555     # Return the predecessor via the corresponding path.
 556     my $self = shift;
 557     return $self->_find_linked_reading( 'prior', @_ );
 558 }
 559
 560 sub _find_linked_reading {
 561     my( $self, $direction, $node, $path, $alt_path ) = @_;
 562     my @linked_paths = $direction eq 'next'
 563         ? $node->outgoing() : $node->incoming();
 564     return undef unless scalar( @linked_paths );
 565
 566     # We have to find the linked path that contains all of the
 567     # witnesses supplied in $path.
 568     my( @path_wits, @alt_path_wits );
 569     @path_wits = $self->witnesses_of_label( $path ) if $path;
 570     @alt_path_wits = $self->witnesses_of_label( $alt_path ) if $alt_path;
 571     my $base_le;
 572     my $alt_le;
 573     foreach my $le ( @linked_paths ) {
 574         if( $le->name eq $self->baselabel ) {
 575             $base_le = $le;
 576         } else {
 577             my @le_wits = $self->witnesses_of_label( $le->name );
 578             if( _is_within( \@path_wits, \@le_wits ) ) {
 579                 # This is the right path.
 580                 return $direction eq 'next' ? $le->to() : $le->from();
 581             } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
 582                 $alt_le = $le;
 583             }
 584         }
 585     }
 586     # Got this far? Return the alternate path if it exists.
 587     return $direction eq 'next' ? $alt_le->to() : $alt_le->from()
 588         if $alt_le;
 589
 590     # Got this far? Return the base path if it exists.
 591     return $direction eq 'next' ? $base_le->to() : $base_le->from()
 592         if $base_le;
 593
 594     # Got this far? We have no appropriate path.
 595     warn "Could not find $direction node from " . $node->label
 596         . " along path $path";
 597     return undef;
 598 }
 599
 600 # Some set logic.
 601 sub _is_within {
 602     my( $set1, $set2 ) = @_;
 603     my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
 604     foreach my $el ( @$set1 ) {
 605         $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
 606     }
 607     return $ret;
 608 }
 609
 610
 611 ## INITIALIZATION METHODS - for use by parsers
 612 # Walk the paths for each witness in the graph, and return the nodes
 613 # that the graph has in common.  If $using_base is true, some
 614 # different logic is needed.
 615
 616 sub walk_witness_paths {
 617     my( $self, $end ) = @_;
 618     # For each witness, walk the path through the graph.
 619     # Then we need to find the common nodes.
 620     # TODO This method is going to fall down if we have a very gappy
 621     # text in the collation.
 622     my $paths = {};
 623     my @common_readings;
 624     foreach my $wit ( @{$self->tradition->witnesses} ) {
 625         my $curr_reading = $self->start;
 626         my @wit_path = $self->reading_sequence( $self->start, $end,
 627                                                 $wit->sigil );
 628         $wit->path( \@wit_path );
 629
 630         # Detect the common readings.
 631         @common_readings = _find_common( \@common_readings, \@wit_path );
 632     }
 633
 634     # Mark all the nodes as either common or not.
 635     foreach my $cn ( @common_readings ) {
 636         print STDERR "Setting " . $cn->name . " / " . $cn->label
 637             . " as common node\n";
 638         $cn->make_common;
 639     }
 640     foreach my $n ( $self->readings() ) {
 641         $n->make_variant unless $n->is_common;
 642     }
 643     # Return an array of the common nodes in order.
 644     return @common_readings;
 645 }
 646
 647 sub _find_common {
 648     my( $common_readings, $new_path ) = @_;
 649     my @cr;
 650     if( @$common_readings ) {
 651         foreach my $n ( @$new_path ) {
 652             push( @cr, $n ) if grep { $_ eq $n } @$common_readings;
 653         }
 654     } else {
 655         push( @cr, @$new_path );
 656     }
 657     return @cr;
 658 }
 659
 660 sub _remove_common {
 661     my( $common_readings, $divergence ) = @_;
 662     my @cr;
 663     my %diverged;
 664     map { $diverged{$_->name} = 1 } @$divergence;
 665     foreach( @$common_readings ) {
 666         push( @cr, $_ ) unless $diverged{$_->name};
 667     }
 668     return @cr;
 669 }
 670
 671
 672 # An alternative to walk_witness_paths, for use when a collation is
 673 # constructed from a base text and an apparatus.  We have the
 674 # sequences of readings and just need to add path edges.
 675
 676 sub make_witness_paths {
 677     my( $self ) = @_;
 678
 679     my @common_readings;
 680     foreach my $wit ( @{$self->tradition->witnesses} ) {
 681         print STDERR "Making path for " . $wit->sigil . "\n";
 682         $self->make_witness_path( $wit );
 683         @common_readings = _find_common( \@common_readings, $wit->path );
 684         @common_readings = _find_common( \@common_readings, $wit->uncorrected_path );
 685     }
 686     return @common_readings;
 687 }
 688
 689 sub make_witness_path {
 690     my( $self, $wit ) = @_;
 691     my @chain = @{$wit->path};
 692     my $sig = $wit->sigil;
 693     foreach my $idx ( 0 .. $#chain-1 ) {
 694         $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
 695     }
 696     @chain = @{$wit->uncorrected_path};
 697     foreach my $idx( 0 .. $#chain-1 ) {
 698         my $source = $chain[$idx];
 699         my $target = $chain[$idx+1];
 700         $self->add_path( $source, $target, "$sig (a.c.)" )
 701             unless $self->has_path( $source, $target, $sig );
 702     }
 703 }
 704
 705 sub common_readings {
 706     my $self = shift;
 707     my @common = grep { $_->is_common } $self->readings();
 708     return sort { _cmp_position( $a->position, $b->position ) } @common;
 709 }
 710
 711 # Calculate the relative positions of nodes in the graph, if they
 712 # were not given to us.
 713 sub calculate_positions {
 714     my( $self, @ordered_common ) = @_;
 715
 716     # We have to calculate the position identifiers for each word,
 717     # keyed on the common nodes.  This will be 'fun'.  The end result
 718     # is a hash per witness, whose key is the word node and whose
 719     # value is its position in the text.  Common nodes are always N,1
 720     # so have identical positions in each text.
 721
 722     my $node_pos = {};
 723     foreach my $wit ( @{$self->tradition->witnesses} ) {
 724         print STDERR "Calculating positions in " . $wit->sigil . "\n";
 725         _update_positions_from_path( $wit->path, @ordered_common );
 726         _update_positions_from_path( $wit->uncorrected_path, @ordered_common )
 727             if $wit->has_ante_corr;
 728     }
 729
 730     # DEBUG
 731     foreach my $r ( $self->readings() ) {
 732         print STDERR "Reading " . $r->name . "/" . $r->label . " has no position\n"
 733             unless( $r->has_position );
 734     }
 735
 736     $self->init_lemmata();
 737 }
 738
 739 sub _update_positions_from_path {
 740     my( $path, @ordered_common ) = @_;
 741
 742     # First we walk the given path, making a matrix for the witness
 743     # that corresponds to its eventual position identifier.  Common
 744     # nodes always start a new row, and are thus always in the first
 745     # column.
 746     my $wit_matrix = [];
 747     my $cn = 0;  # We should hit the common readings in order.
 748     my $row = [];
 749     foreach my $wn ( @{$path} ) {
 750         if( $wn eq $ordered_common[$cn] ) {
 751             # Set up to look for the next common node, and
 752             # start a new row of words.
 753             $cn++;
 754             push( @$wit_matrix, $row ) if scalar( @$row );
 755             $row = [];
 756         }
 757         push( @$row, $wn );
 758     }
 759     push( @$wit_matrix, $row );  # Push the last row onto the matrix
 760
 761     # Now we have a matrix per witness, so that each row in the
 762     # matrix begins with a common node, and continues with all the
 763     # variant words that appear in the witness.  We turn this into
 764     # real positions in row,cell format.  But we need some
 765     # trickery in order to make sure that each node gets assigned
 766     # to only one position.
 767
 768     foreach my $li ( 1..scalar(@$wit_matrix) ) {
 769         foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
 770             my $reading = $wit_matrix->[$li-1]->[$di-1];
 771             my $position = "$li,$di";
 772
 773             # If we have seen this node before, we need to compare
 774             # its position with what went before.
 775             unless( $reading->has_position &&
 776                     _cmp_position( $position, $reading->position ) < 1 ) {
 777                 # The new position ID replaces the old one.
 778                 $reading->position( $position );
 779             } # otherwise, the old position needs to stay.
 780         }
 781     }
 782 }
 783
 784 sub _cmp_position {
 785     my( $a, $b ) = @_;
 786     if ( $a && $b ) {
 787         my @pos_a = split(/,/, $a );
 788         my @pos_b = split(/,/, $b );
 789
 790         my $big_cmp = $pos_a[0] <=> $pos_b[0];
 791         return $big_cmp if $big_cmp;
 792         # else
 793         return $pos_a[1] <=> $pos_b[1];
 794     } elsif ( $b ) { # a is undefined
 795         return -1;
 796     } elsif ( $a ) { # b is undefined
 797         return 1;
 798     }
 799     return 0; # they are both undefined
 800 }
 801
 802 sub all_positions {
 803     my $self = shift;
 804     my %positions = ();
 805     map { $positions{$_->position} = 1 } $self->readings;
 806     my @answer = sort { _cmp_position( $a, $b ) } keys( %positions );
 807     return @answer;
 808 }
 809
 810 sub readings_at_position {
 811     my( $self, $pos ) = @_;
 812     my @answer = grep { $_->position eq $pos } $self->readings;
 813     return @answer;
 814 }
 815
 816 ## Lemmatizer functions
 817
 818 sub init_lemmata {
 819     my $self = shift;
 820
 821     foreach my $position ( $self->all_positions ) {
 822         $self->lemmata->{$position} = undef;
 823     }
 824
 825     foreach my $cr ( $self->common_readings ) {
 826         $self->lemmata->{$cr->position} = $cr->name;
 827     }
 828 }
 829
 830 =item B<lemma_readings>
 831
 832 my @state = $graph->lemma_readings( @readings_delemmatized );
 833
 834 Takes a list of readings that have just been delemmatized, and returns
 835 a set of tuples of the form ['reading', 'state'] that indicates what
 836 changes need to be made to the graph.
 837
 838 =over
 839
 840 =item *
 841
 842 A state of 1 means 'lemmatize this reading'
 843
 844 =item *
 845
 846 A state of 0 means 'delemmatize this reading'
 847
 848 =item *
 849
 850 A state of undef means 'an ellipsis belongs in the text here because
 851 no decision has been made / an earlier decision was backed out'
 852
 853 =back
 854
 855 =cut
 856
 857 sub lemma_readings {
 858     my( $self, @toggled_off_nodes ) = @_;
 859
 860     # First get the positions of those nodes which have been
 861     # toggled off.
 862     my $positions_off = {};
 863     map { $positions_off->{ $_->position } = $_->name } @toggled_off_nodes;
 864
 865     # Now for each position, we have to see if a node is on, and we
 866     # have to see if a node has been turned off.
 867     my @answer;
 868     foreach my $pos ( $self->all_positions() ) {
 869         # Find the state of this position.  If there is an active node,
 870         # its name will be the state; otherwise the state will be 0
 871         # (nothing at this position) or undef (ellipsis at this position)
 872         my $active = $self->lemmata->{$pos};
 873
 874         # Is there a formerly active node that was toggled off?
 875         if( exists( $positions_off->{$pos} ) ) {
 876             my $off_node = $positions_off->{$pos};
 877             if( $active && $active ne $off_node) {
 878                 push( @answer, [ $off_node, 0 ], [ $active, 1 ] );
 879             } else {
 880                 push( @answer, [ $off_node, $active ] );
 881             }
 882
 883         # No formerly active node, so we just see if there is a currently
 884         # active one.
 885         } elsif( $active ) {
 886             # Push the active node, whatever it is.
 887             push( @answer, [ $active, 1 ] );
 888         } else {
 889             # Push the state that is there. Arbitrarily use the first node
 890             # at that position.
 891             my @pos_nodes = $self->readings_at_position( $pos );
 892             push( @answer, [ $pos_nodes[0]->name, $self->lemmata->{$pos} ] );
 893         }
 894     }
 895
 896     return @answer;
 897 }
 898
 899 =item B<toggle_reading>
 900
 901 my @readings_delemmatized = $graph->toggle_reading( $reading_name );
 902
 903 Takes a reading node name, and either lemmatizes or de-lemmatizes
 904 it. Returns a list of all readings that are de-lemmatized as a result
 905 of the toggle.
 906
 907 =cut
 908
 909 sub toggle_reading {
 910     my( $self, $rname ) = @_;
 911
 912     return unless $rname;
 913     my $reading = $self->reading( $rname );
 914     if( !$reading || $reading->is_common() ) {
 915         # Do nothing, it's a common node.
 916         return;
 917     }
 918
 919     my $pos = $reading->position;
 920     my $old_state = $self->lemmata->{$pos};
 921     my @readings_off;
 922     if( $old_state && $old_state eq $rname ) {
 923         # Turn off the node. We turn on no others by default.
 924         push( @readings_off, $reading );
 925     } else {
 926         # Turn on the node.
 927         $self->lemmata->{$pos} = $rname;
 928         # Any other 'on' readings in the same position should be off.
 929         push( @readings_off, $self->same_position_as( $reading ) );
 930         # Any node that is an identical transposed one should be off.
 931         push( @readings_off, $reading->identical_readings );
 932     }
 933     @readings_off = unique_list( @readings_off );
 934
 935     # Turn off the readings that need to be turned off.
 936     my @readings_delemmatized;
 937     foreach my $n ( @readings_off ) {
 938         my $state = $self->lemmata->{$n->position};
 939         if( $state && $state eq $n->name ) {
 940             # this reading is still on, so turn it off
 941             push( @readings_delemmatized, $n );
 942             my $new_state = undef;
 943             if( $n eq $reading ) {
 944                 # This is the reading that was clicked, so if there are no
 945                 # other readings there, turn off the position.  In all other
 946                 # cases, restore the ellipsis.
 947                 my @other_n = $self->same_position_as( $n );
 948                 $new_state = 0 unless @other_n;
 949             }
 950             $self->lemmata->{$n->position} = $new_state;
 951         } elsif( $old_state && $old_state eq $n->name ) {
 952             # another reading has already been turned on here
 953             push( @readings_delemmatized, $n );
 954         } # else some other reading was on anyway, so pass.
 955     }
 956     return @readings_delemmatized;
 957 }
 958
 959 sub same_position_as {
 960     my( $self, $reading ) = @_;
 961     my $pos = $reading->position;
 962     my @same = grep { $_ ne $reading } $self->readings_at_position( $reading->position );
 963     return @same;
 964 }
 965
 966 # Return the string that joins together a list of witnesses for
 967 # display on a single path.
 968 sub path_label {
 969     my $self = shift;
 970     return join( $self->wit_list_separator, @_ );
 971 }
 972
 973 sub witnesses_of_label {
 974     my( $self, $label ) = @_;
 975     my $regex = $self->wit_list_separator;
 976     my @answer = split( /\Q$regex\E/, $label );
 977     return @answer;
 978 }
 979
 980 sub unique_list {
 981     my( @list ) = @_;
 982     my %h;
 983     map { $h{$_->name} = $_ } @list;
 984     return values( %h );
 985 }
 986
 987 sub add_hash_entry {
 988     my( $hash, $key, $entry ) = @_;
 989     if( exists $hash->{$key} ) {
 990         push( @{$hash->{$key}}, $entry );
 991     } else {
 992         $hash->{$key} = [ $entry ];
 993     }
 994 }
 995
 996 no Moose;
 997 __PACKAGE__->meta->make_immutable;