lib/Text/Tradition/Analysis.pm

   1 package Text::Tradition::Analysis;
   2
   3 use strict;
   4 use warnings;
   5 use Benchmark;
   6 use Encode qw/ encode_utf8 /;
   7 use Exporter 'import';
   8 use Graph;
   9 use JSON qw/ encode_json decode_json /;
  10 use LWP::UserAgent;
  11 use Text::Tradition;
  12 use Text::Tradition::Stemma;
  13 use TryCatch;
  14
  15 use vars qw/ @EXPORT_OK /;
  16 @EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
  17
  18 =head1 NAME
  19
  20 Text::Tradition::Analysis - functions for stemma analysis of a tradition
  21
  22 =head1 SYNOPSIS
  23
  24   use Text::Tradition;
  25   use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
  26   my $t = Text::Tradition->new(
  27     'name' => 'this is a text',
  28     'input' => 'TEI',
  29     'file' => '/path/to/tei_parallel_seg_file.xml' );
  30   $t->add_stemma( 'dotfile' => $stemmafile );
  31
  32   my $variant_data = run_analysis( $tradition );
  33   # Recalculate rank $n treating all orthographic variants as equivalent
  34   my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
  35
  36 =head1 DESCRIPTION
  37
  38 Text::Tradition is a library for representation and analysis of collated
  39 texts, particularly medieval ones.  The Collation is the central feature of
  40 a Tradition, where the text, its sequence of readings, and its relationships
  41 between readings are actually kept.
  42
  43 =head1 SUBROUTINES
  44
  45 =head2 run_analysis( $tradition, %opts )
  46
  47 Runs the analysis described in analyze_variant_location on every location in the
  48 collation of the given tradition, with the given options. These include:
  49
  50 =over 4
  51
  52 =item * stemma_id - Specify which of the tradition's stemmata to use. Default
  53 is 0 (i.e. the first).
  54
  55 =item * ranks - Specify a list of location ranks to analyze; exclude the rest.
  56
  57 =item * merge_types - Specify a list of relationship types, where related readings
  58 should be treated as identical for the purposes of analysis.
  59
  60 =item * exclude_type1 - Exclude those ranks whose groupings have only type-1 variants.
  61
  62 =back
  63
  64 =begin testing
  65
  66 use Text::Tradition;
  67 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
  68
  69 my $datafile = 't/data/florilegium_tei_ps.xml';
  70 my $tradition = Text::Tradition->new( 'input' => 'TEI',
  71                                       'name' => 'test0',
  72                                       'file' => $datafile );
  73 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
  74 is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
  75
  76 my %expected_genealogical = (
  77         1 => 0,
  78         2 => 1,
  79         3 =>  0,
  80         5 =>  0,
  81         7 =>  0,
  82         8 =>  0,
  83         10 => 0,
  84         13 => 1,
  85         33 => 0,
  86         34 => 0,
  87         37 => 0,
  88         60 => 0,
  89         81 => 1,
  90         84 => 0,
  91         87 => 0,
  92         101 => 0,
  93         102 => 0,
  94         122 => 1,
  95         157 => 0,
  96         166 => 1,
  97         169 => 1,
  98         200 => 0,
  99         216 => 1,
 100         217 => 1,
 101         219 => 1,
 102         241 => 1,
 103         242 => 1,
 104         243 => 1,
 105 );
 106
 107 my $data = run_analysis( $tradition );
 108 my $c = $tradition->collation;
 109 foreach my $row ( @{$data->{'variants'}} ) {
 110         # Account for rows that used to be "not useful"
 111         unless( exists $expected_genealogical{$row->{'id'}} ) {
 112                 $expected_genealogical{$row->{'id'}} = 1;
 113         }
 114         my $gen_bool = $row->{'genealogical'} ? 1 : 0;
 115         is( $gen_bool, $expected_genealogical{$row->{'id'}},
 116                 "Got correct genealogical flag for row " . $row->{'id'} );
 117         # Check that we have the right row with the right groups
 118         my $rank = $row->{'id'};
 119         foreach my $rdghash ( @{$row->{'readings'}} ) {
 120                 # Skip 'readings' that aren't really
 121                 next unless $c->reading( $rdghash->{'readingid'} );
 122                 # Check the rank
 123                 is( $c->reading( $rdghash->{'readingid'} )->rank, $rank,
 124                         "Got correct reading rank" );
 125                 # Check the witnesses
 126                 my @realwits = sort $c->reading_witnesses( $rdghash->{'readingid'} );
 127                 my @sgrp = sort @{$rdghash->{'group'}};
 128                 is_deeply( \@sgrp, \@realwits, "Reading analyzed with correct groups" );
 129         }
 130 }
 131 is( $data->{'variant_count'}, 58, "Got right total variant number" );
 132 # TODO Make something meaningful of conflict count, maybe test other bits
 133
 134 =end testing
 135
 136 =cut
 137
 138 sub run_analysis {
 139         my( $tradition, %opts ) = @_;
 140         my $c = $tradition->collation;
 141
 142         my $stemma_id = $opts{'stemma_id'} || 0;
 143         my @ranks = ref( $opts{'ranks'} ) eq 'ARRAY' ? @{$opts{'ranks'}} : ();
 144         my @collapse = ref( $opts{'merge_types'} ) eq 'ARRAY' ? @{$opts{'merge_types'}} : ();
 145
 146         # Get the stemma
 147         my $stemma = $tradition->stemma( $stemma_id );
 148
 149         # Figure out which witnesses we are working with - that is, the ones that
 150         # appear both in the stemma and in the tradition. All others are 'lacunose'
 151         # for our purposes.
 152         my @lacunose = $stemma->hypotheticals;
 153         my @tradition_wits = map { $_->sigil } $tradition->witnesses;
 154         push( @lacunose, _symmdiff( [ $stemma->witnesses ], \@tradition_wits ) );
 155
 156         # Find and mark 'common' ranks for exclusion, unless they were
 157         # explicitly specified.
 158         unless( @ranks ) {
 159                 my %common_rank;
 160                 foreach my $rdg ( $c->common_readings ) {
 161                         $common_rank{$rdg->rank} = 1;
 162                 }
 163                 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
 164         }
 165
 166         # Group the variants to send to the solver
 167         my @groups;
 168         my @use_ranks;
 169         my %lacunae;
 170         my $moved = {};
 171         foreach my $rank ( @ranks ) {
 172                 my $missing = [ @lacunose ];
 173                 my $rankgroup = group_variants( $tradition, $rank, $missing, $moved, \@collapse );
 174                 # Filter out any empty rankgroups
 175                 # (e.g. from the later rank for a transposition)
 176                 next unless keys %$rankgroup;
 177                 if( $opts{'exclude_type1'} ) {
 178                         # Check to see whether this is a "useful" group.
 179                         my( $rdgs, $grps ) = _useful_variant( $rankgroup,
 180                                 $stemma->graph, $c->ac_label );
 181                         next unless @$rdgs;
 182                 }
 183                 push( @use_ranks, $rank );
 184                 push( @groups, $rankgroup );
 185                 $lacunae{$rank} = $missing;
 186         }
 187         # Run the solver
 188         my $answer = solve_variants( $stemma, @groups );
 189
 190         # Do further analysis on the answer
 191         my $conflict_count = 0;
 192         my $aclabel = $c->ac_label;
 193         foreach my $idx ( 0 .. $#use_ranks ) {
 194                 my $location = $answer->{'variants'}->[$idx];
 195                 # Add the rank back in
 196                 my $rank = $use_ranks[$idx];
 197                 $location->{'id'} = $rank;
 198                 # Note what our lacunae are
 199                 my %lmiss;
 200                 map { $lmiss{$_} = 1 } @{$lacunae{$use_ranks[$idx]}};
 201                 $location->{'missing'} = [ keys %lmiss ];
 202
 203                 # Run the extra analysis we need.
 204                 analyze_location( $tradition, $stemma, $location, \%lmiss );
 205
 206                 my @layerwits;
 207                 # Do the final post-analysis tidying up of the data.
 208                 foreach my $rdghash ( @{$location->{'readings'}} ) {
 209                         $conflict_count++
 210                                 if exists $rdghash->{'conflict'} && $rdghash->{'conflict'};
 211                         # Add the reading text back in, setting display value as needed
 212                         my $rdg = $c->reading( $rdghash->{'readingid'} );
 213                         if( $rdg ) {
 214                                 $rdghash->{'text'} = $rdg->text .
 215                                         ( $rdg->rank == $rank ? '' : ' [' . $rdg->rank . ']' );
 216                                 $rdghash->{'is_ungrammatical'} = $rdg->grammar_invalid;
 217                                 $rdghash->{'is_nonsense'} = $rdg->is_nonsense;
 218                         }
 219                         # Remove lacunose witnesses from this reading's list now that the
 220                         # analysis is done
 221                         my @realgroup;
 222                         map { push( @realgroup, $_ ) unless $lmiss{$_} } @{$rdghash->{'group'}};
 223                         $rdghash->{'group'} = \@realgroup;
 224                         # Note any layered witnesses that appear in this group
 225                         foreach( @realgroup ) {
 226                                 if( $_ =~ /^(.*)\Q$aclabel\E$/ ) {
 227                                         push( @layerwits, $1 );
 228                                 }
 229                         }
 230                 }
 231                 $location->{'layerwits'} = \@layerwits if @layerwits;
 232         }
 233         $answer->{'conflict_count'} = $conflict_count;
 234
 235         return $answer;
 236 }
 237
 238 =head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
 239
 240 Groups the variants at the given $rank of the collation, treating any
 241 relationships in @merge_relationship_types as equivalent.  $lacunose should
 242 be a reference to an array, to which the sigla of lacunose witnesses at this
 243 rank will be appended; $transposed should be a reference to a hash, wherein
 244 the identities of transposed readings and their relatives will be stored.
 245
 246 Returns a hash $group_readings where $rdg is attested by the witnesses listed
 247 in $group_readings->{$rdg}.
 248
 249 =cut
 250
 251 # Return group_readings, groups, lacunose
 252 sub group_variants {
 253         my( $tradition, $rank, $lacunose, $transposed, $collapse ) = @_;
 254         my $c = $tradition->collation;
 255         my $aclabel = $c->ac_label;
 256         my $table = $c->alignment_table;
 257         # Get the alignment table readings
 258         my %readings_at_rank;
 259         my %is_lacunose; # lookup table for witnesses not in stemma
 260         map { $is_lacunose{$_} = 1; $is_lacunose{$_.$aclabel} = 1 } @$lacunose;
 261         my @check_for_gaps;
 262         my %moved_wits;
 263         my $has_transposition;
 264         foreach my $tablewit ( @{$table->{'alignment'}} ) {
 265                 my $rdg = $tablewit->{'tokens'}->[$rank-1];
 266                 my $wit = $tablewit->{'witness'};
 267                 # Exclude the witness if it is "lacunose" which if we got here
 268                 # means "not in the stemma".
 269                 next if $is_lacunose{$wit};
 270                 # Note if the witness is actually in a lacuna
 271                 if( $rdg && $rdg->{'t'}->is_lacuna ) {
 272                         _add_to_witlist( $wit, $lacunose, $aclabel );
 273                 # Otherwise the witness either has a positive reading...
 274                 } elsif( $rdg ) {
 275                         # If the reading has been counted elsewhere as a transposition, ignore it.
 276                         if( $transposed->{$rdg->{'t'}->id} ) {
 277                                 # TODO Does this cope with three-way transpositions?
 278                                 map { $moved_wits{$_} = 1 } @{$transposed->{$rdg->{'t'}->id}};
 279                                 next;
 280                         }
 281                         # Otherwise, record it...
 282                         $readings_at_rank{$rdg->{'t'}->id} = $rdg->{'t'};
 283                         # ...and grab any transpositions, and their relations.
 284                         my @transp = grep { $_->rank != $rank } $rdg->{'t'}->related_readings();
 285                         foreach my $trdg ( @transp ) {
 286                                 next if exists $readings_at_rank{$trdg->id};
 287                                 $has_transposition = 1;
 288                                 my @affected_wits = _table_witnesses(
 289                                         $table, $trdg, \%is_lacunose, $aclabel );
 290                                 next unless @affected_wits;
 291                                 map { $moved_wits{$_} = 1 } @affected_wits;
 292                                 $transposed->{$trdg->id} =
 293                                         [ _table_witnesses( $table, $rdg->{'t'}, \%is_lacunose, $aclabel ) ];
 294                                 $readings_at_rank{$trdg->id} = $trdg;
 295                         }
 296                 # ...or it is empty, ergo a gap.
 297                 } else {
 298                         _add_to_witlist( $wit, \@check_for_gaps, $aclabel );
 299                 }
 300         }
 301         my @gap_wits;
 302         map { _add_to_witlist( $_, \@gap_wits, $aclabel )
 303                 unless $moved_wits{$_} } @check_for_gaps;
 304         # Group the readings, collapsing groups by relationship if needed
 305         my $grouped_readings = {};
 306         foreach my $rdg ( values %readings_at_rank ) {
 307                 # Skip readings that have been collapsed into others.
 308                 next if exists $grouped_readings->{$rdg->id}
 309                         && $grouped_readings->{$rdg->id} eq 'COLLAPSE';
 310                 # Get the witness list, including from readings collapsed into this one.
 311                 my @wits = _table_witnesses( $table, $rdg, \%is_lacunose, $aclabel );
 312                 if( $collapse && @$collapse ) {
 313                         my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
 314                         foreach my $other ( $rdg->related_readings( $filter ) ) {
 315                                 my @otherwits = _table_witnesses(
 316                                         $table, $other, \%is_lacunose, $aclabel );
 317                                 push( @wits, @otherwits );
 318                                 $grouped_readings->{$other->id} = 'COLLAPSE';
 319                         }
 320                 }
 321                 $grouped_readings->{$rdg->id} = \@wits;
 322         }
 323         $grouped_readings->{'(omitted)'} = \@gap_wits if @gap_wits;
 324         # Get rid of our collapsed readings
 325         map { delete $grouped_readings->{$_} if $grouped_readings->{$_} eq 'COLLAPSE' }
 326                 keys %$grouped_readings
 327                 if $collapse;
 328
 329         # If something was transposed, check the groups for doubled-up readings
 330         if( $has_transposition ) {
 331                 # print STDERR "Group for rank $rank:\n";
 332                 # map { print STDERR "\t$_: " . join( ' ' , @{$grouped_readings->{$_}} ) . "\n" }
 333                 #       keys %$grouped_readings;
 334                 _check_transposed_consistency( $c, $rank, $transposed, $grouped_readings );
 335         }
 336
 337         # Return the result
 338         return $grouped_readings;
 339 }
 340
 341 # Helper function to query the alignment table for all witnesses (a.c. included)
 342 # that have a given reading at its rank.
 343 sub _table_witnesses {
 344         my( $table, $trdg, $lacunose, $aclabel ) = @_;
 345         my $tableidx = $trdg->rank - 1;
 346         my @has_reading;
 347         foreach my $row ( @{$table->{'alignment'}} ) {
 348                 my $wit = $row->{'witness'};
 349                 next if $lacunose->{$wit};
 350                 my $rdg = $row->{'tokens'}->[$tableidx];
 351                 next unless exists $rdg->{'t'} && defined $rdg->{'t'};
 352                 _add_to_witlist( $wit, \@has_reading, $aclabel )
 353                         if $rdg->{'t'}->id eq $trdg->id;
 354         }
 355         return @has_reading;
 356 }
 357
 358 # Helper function to ensure that X and X a.c. never appear in the same list.
 359 sub _add_to_witlist {
 360         my( $wit, $list, $acstr ) = @_;
 361         my %inlist;
 362         my $idx = 0;
 363         map { $inlist{$_} = $idx++ } @$list;
 364         if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
 365                 my $acwit = $1;
 366                 unless( exists $inlist{$acwit} ) {
 367                         push( @$list, $acwit.$acstr );
 368                 }
 369         } else {
 370                 if( exists( $inlist{$wit.$acstr} ) ) {
 371                         # Replace the a.c. version with the main witness
 372                         my $i = $inlist{$wit.$acstr};
 373                         $list->[$i] = $wit;
 374                 } else {
 375                         push( @$list, $wit );
 376                 }
 377         }
 378 }
 379
 380 sub _check_transposed_consistency {
 381         my( $c, $rank, $transposed, $groupings ) = @_;
 382         my %seen_wits;
 383         my %thisrank;
 384         # Note which readings are actually at this rank, and which witnesses
 385         # belong to which reading.
 386         foreach my $rdg ( keys %$groupings ) {
 387                 my $rdgobj = $c->reading( $rdg );
 388                 # Count '(omitted)' as a reading at this rank
 389                 $thisrank{$rdg} = 1 if !$rdgobj || $rdgobj->rank == $rank;
 390                 map { push( @{$seen_wits{$_}}, $rdg ) } @{$groupings->{$rdg}};
 391         }
 392         # Our work is done if we have no witness belonging to more than one
 393         # reading.
 394         my @doubled = grep { scalar @{$seen_wits{$_}} > 1 } keys %seen_wits;
 395         return unless @doubled;
 396         # If we have a symmetric related transposition, drop the non-rank readings.
 397         if( @doubled == scalar keys %seen_wits ) {
 398                 foreach my $rdg ( keys %$groupings ) {
 399                         if( !$thisrank{$rdg} ) {
 400                                 my $groupstr = wit_stringify( $groupings->{$rdg} );
 401                                 my ( $matched ) = grep { $groupstr eq wit_stringify( $groupings->{$_} ) }
 402                                         keys %thisrank;
 403                                 delete $groupings->{$rdg};
 404                                 # If we found a group match, assume there is a symmetry happening.
 405                                 # TODO think more about this
 406                                 # print STDERR "*** Deleting symmetric reading $rdg\n";
 407                                 unless( $matched ) {
 408                                         delete $transposed->{$rdg};
 409                                         warn "Found problem in evident symmetry with reading $rdg";
 410                                 }
 411                         }
 412                 }
 413         # Otherwise 'unhook' the transposed reading(s) that have duplicates.
 414         } else {
 415                 foreach my $dup ( @doubled ) {
 416                         foreach my $rdg ( @{$seen_wits{$dup}} ) {
 417                                 next if $thisrank{$rdg};
 418                                 next unless exists $groupings->{$rdg};
 419                                 # print STDERR "*** Deleting asymmetric doubled-up reading $rdg\n";
 420                                 delete $groupings->{$rdg};
 421                                 delete $transposed->{$rdg};
 422                         }
 423                 }
 424                 # and put any now-orphaned readings into an 'omitted' reading.
 425                 foreach my $wit ( keys %seen_wits ) {
 426                         unless( grep { exists $groupings->{$_} } @{$seen_wits{$wit}} ) {
 427                                 $groupings->{'(omitted)'} = [] unless exists $groupings->{'(omitted)'};
 428                                 _add_to_witlist( $wit, $groupings->{'(omitted)'}, $c->ac_label );
 429                         }
 430                 }
 431         }
 432 }
 433
 434 =head2 solve_variants( $graph, @groups )
 435
 436 Sends the set of groups to the external graph solver service and returns
 437 a cleaned-up answer, adding the rank IDs back where they belong.
 438
 439 The JSON has the form
 440   { "graph": [ stemmagraph DOT string without newlines ],
 441     "groupings": [ array of arrays of groups, one per rank ] }
 442
 443 The answer has the form
 444   { "variants" => [ array of variant location structures ],
 445     "variant_count" => total,
 446     "conflict_count" => number of conflicts detected,
 447     "genealogical_count" => number of solutions found }
 448
 449 =cut
 450
 451 sub solve_variants {
 452         my( $stemma, @groups ) = @_;
 453         my $aclabel = $stemma->collation->ac_label;
 454
 455         # Filter the groups down to distinct groups, and work out what graph
 456         # should be used in the calculation of each group. We want to send each
 457         # distinct problem to the solver only once.
 458         # We need a whole bunch of lookup tables for this.
 459         my $index_groupkeys = {};       # Save the order of readings
 460         my $group_indices = {};         # Save the indices that have a given grouping
 461         my $graph_problems = {};        # Save the groupings for the given graph
 462
 463         foreach my $idx ( 0..$#groups ) {
 464                 my $ghash = $groups[$idx];
 465                 my @grouping;
 466                 # Sort the groupings from big to little, and scan for a.c. witnesses
 467                 # that would need an extended graph.
 468                 my @acwits;   # note which AC witnesses crop up at this rank
 469                 my @idxkeys = sort { scalar @{$ghash->{$b}} <=> scalar @{$ghash->{$a}} }
 470                         keys %$ghash;
 471                 foreach my $rdg ( @idxkeys ) {
 472                         my @sg = sort @{$ghash->{$rdg}};
 473                         push( @acwits, grep { $_ =~ /\Q$aclabel\E$/ } @sg );
 474                         push( @grouping, \@sg );
 475                 }
 476                 # Save the reading order
 477                 $index_groupkeys->{$idx} = \@idxkeys;
 478
 479                 # Now associate the distinct group with this index
 480                 my $gstr = wit_stringify( \@grouping );
 481                 push( @{$group_indices->{$gstr}}, $idx );
 482
 483                 # Finally, add the group to the list to be calculated for this graph.
 484                 map { s/\Q$aclabel\E$// } @acwits;
 485                 my $graph;
 486                 try {
 487                         $graph = $stemma->extend_graph( \@acwits );
 488                 } catch {
 489                         die "Unable to extend graph with @acwits";
 490                 }
 491                 unless( exists $graph_problems->{"$graph"} ) {
 492                         $graph_problems->{"$graph"} = { 'object' => $graph, 'groups' => [] };
 493                 }
 494                 push( @{$graph_problems->{"$graph"}->{'groups'}}, \@grouping );
 495         }
 496
 497         ## For each distinct graph, send its groups to the solver.
 498         my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
 499         my $ua = LWP::UserAgent->new();
 500         ## Witness map is a HACK to get around limitations in node names from IDP
 501         my $witness_map = {};
 502         ## Variables to store answers as they come back
 503         my $variants = [ ( undef ) x ( scalar keys %$index_groupkeys ) ];
 504         my $genealogical = 0;
 505         foreach my $graphkey ( keys %$graph_problems ) {
 506                 my $graph = $graph_problems->{$graphkey}->{'object'};
 507                 my $groupings = $graph_problems->{$graphkey}->{'groups'};
 508                 my $json = encode_json( _safe_wit_strings( $graph, $stemma->collation,
 509                         $groupings, $witness_map ) );
 510                 # Send it off and get the result
 511                 #print STDERR "Sending request: $json\n";
 512                 my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
 513                                                           'Content' => $json );
 514                 my $answer;
 515                 my $used_idp;
 516                 if( $resp->is_success ) {
 517                         $answer = _desanitize_names( decode_json( $resp->content ), $witness_map );
 518                         $used_idp = 1;
 519                 } else {
 520                         # Fall back to the old method.
 521                         warn "IDP solver returned " . $resp->status_line . " / " . $resp->content
 522                                 . "; falling back to perl method";
 523                         $answer = perl_solver( $graph, @$groupings );
 524                 }
 525                 ## The answer is the evaluated groupings, plus a boolean for whether
 526                 ## they were genealogical.  Reconstruct our original groups.
 527                 foreach my $gidx ( 0 .. $#{$groupings} ) {
 528                         my( $calc_groups, $result ) = @{$answer->[$gidx]};
 529                         if( $result ) {
 530                                 $genealogical++;
 531                                 # Prune the calculated groups, in case the IDP solver failed to.
 532                                 if( $used_idp ) {
 533                                         my @pruned_groups;
 534                                         foreach my $cg ( @$calc_groups ) {
 535                                                 # This is a little wasteful but the path of least
 536                                                 # resistance. Send both the stemma, which knows what
 537                                                 # its hypotheticals are, and the actual graph used.
 538                                                 my @pg = _prune_group( $cg, $stemma, $graph );
 539                                                 push( @pruned_groups, \@pg );
 540                                         }
 541                                         $calc_groups = \@pruned_groups;
 542                                 }
 543                         }
 544                         # Retrieve the key for the original group that went to the solver
 545                         my $input_group = wit_stringify( $groupings->[$gidx] );
 546                         foreach my $oidx ( @{$group_indices->{$input_group}} ) {
 547                                 my @readings = @{$index_groupkeys->{$oidx}};
 548                                 my $vstruct = {
 549                                         'genealogical' => $result,
 550                                         'readings' => [],
 551                                 };
 552                                 foreach my $ridx ( 0 .. $#readings ) {
 553                                         push( @{$vstruct->{'readings'}},
 554                                                 { 'readingid' => $readings[$ridx],
 555                                                   'group' => $calc_groups->[$ridx] } );
 556                                 }
 557                                 $variants->[$oidx] = $vstruct;
 558                         }
 559                 }
 560         }
 561
 562         return { 'variants' => $variants,
 563                          'variant_count' => scalar @$variants,
 564                          'genealogical_count' => $genealogical };
 565 }
 566
 567 #### HACKERY to cope with IDP's limited idea of what a node name looks like ###
 568
 569 sub _safe_wit_strings {
 570         my( $graph, $c, $groupings, $witness_map ) = @_;
 571         # Parse the graph we were given into a stemma.
 572         my $safegraph = Graph->new();
 573         # Convert the graph to a safe representation and store the conversion.
 574         foreach my $n ( $graph->vertices ) {
 575                 my $sn = _safe_witstr( $n );
 576                 if( exists $witness_map->{$sn} ) {
 577                         warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
 578                                 if $witness_map->{$sn} ne $n;
 579                 } else {
 580                         $witness_map->{$sn} = $n;
 581                 }
 582                 $safegraph->add_vertex( $sn );
 583                 $safegraph->set_vertex_attributes( $sn,
 584                         $graph->get_vertex_attributes( $n ) );
 585         }
 586         foreach my $e ( $graph->edges ) {
 587                 my @safe_e = ( _safe_witstr( $e->[0] ), _safe_witstr( $e->[1] ) );
 588                 $safegraph->add_edge( @safe_e );
 589         }
 590         my $safe_stemma = Text::Tradition::Stemma->new(
 591                 'collation' => $c, 'graph' => $safegraph );
 592
 593         # Now convert the witness groupings to a safe representation.
 594         my $safe_groupings = [];
 595         foreach my $grouping ( @$groupings ) {
 596                 my $safe_grouping = [];
 597                 foreach my $group ( @$grouping ) {
 598                         my $safe_group = [];
 599                         foreach my $n ( @$group ) {
 600                                 my $sn = _safe_witstr( $n );
 601                                 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
 602                                         if exists $witness_map->{$sn} && $witness_map->{$sn} ne $n;
 603                                 $witness_map->{$sn} = $n;
 604                                 push( @$safe_group, $sn );
 605                         }
 606                         push( @$safe_grouping, $safe_group );
 607                 }
 608                 push( @$safe_groupings, $safe_grouping );
 609         }
 610
 611         # Return it all in the struct we expect.  We have stored the reductions
 612         # in the $witness_map that we were passed.
 613         return { 'graph' => $safe_stemma->editable( { 'linesep' => ' ' } ),
 614                          'groupings' => $safe_groupings };
 615 }
 616
 617 sub _safe_witstr {
 618         my $witstr = shift;
 619         $witstr =~ s/\s+/_/g;
 620         $witstr =~ s/[^\w\d-]//g;
 621         return $witstr;
 622 }
 623
 624 sub _desanitize_names {
 625         my( $jsonstruct, $witness_map ) = @_;
 626         my $result = [];
 627         foreach my $grouping ( @$jsonstruct ) {
 628                 my $real_grouping = [];
 629                 foreach my $element ( @$grouping ) {
 630                         if( ref( $element ) eq 'ARRAY' ) {
 631                                 # it's the groupset.
 632                                 my $real_groupset = [];
 633                                 foreach my $group ( @$element ) {
 634                                         my $real_group = [];
 635                                         foreach my $n ( @$group ) {
 636                                                 my $rn = $witness_map->{$n};
 637                                                 push( @$real_group, $rn );
 638                                         }
 639                                         push( @$real_groupset, $real_group );
 640                                 }
 641                                 push( @$real_grouping, $real_groupset );
 642                         } else {
 643                                 # It is the boolean, not actually a group.
 644                                 push( @$real_grouping, $element );
 645                         }
 646                 }
 647                 push( @$result, $real_grouping );
 648         }
 649         return $result;
 650 }
 651
 652 ### END HACKERY ###
 653
 654 =head2 analyze_location ( $tradition, $graph, $location_hash )
 655
 656 Given the tradition, its stemma graph, and the solution from the graph solver,
 657 work out the rest of the information we want.  For each reading we need missing,
 658 conflict, reading_parents, independent_occurrence, followed, not_followed, and follow_unknown.  Alters the location_hash in place.
 659
 660 =cut
 661
 662 sub analyze_location {
 663         my ( $tradition, $stemma, $variant_row, $lacunose ) = @_;
 664         my $c = $tradition->collation;
 665
 666         # Make a hash of all known node memberships, and make the subgraphs.
 667         my $contig = {};
 668         my $reading_roots = {};
 669         my $subgraph = {};
 670         my $acstr = $c->ac_label;
 671         my @acwits;
 672         $DB::single = 1 if $variant_row->{id} == 87;
 673         # Note which witnesses positively belong to which group
 674     foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
 675         my $rid = $rdghash->{'readingid'};
 676         foreach my $wit ( @{$rdghash->{'group'}} ) {
 677                 $contig->{$wit} = $rid;
 678             if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
 679                 push( @acwits, $1 );
 680             }
 681         }
 682         }
 683
 684         # Get the actual graph we should work with
 685         my $graph;
 686         try {
 687                 $graph = @acwits ? $stemma->extend_graph( \@acwits ) : $stemma->graph;
 688         } catch {
 689                 die "Could not extend graph with a.c. witnesses @acwits";
 690         }
 691
 692         # Now, armed with that knowledge, make a subgraph for each reading
 693         # and note the root(s) of each subgraph.
 694         foreach my $rdghash( @{$variant_row->{'readings'}} ) {
 695         my $rid = $rdghash->{'readingid'};
 696         my %rdgwits;
 697         # Make the subgraph.
 698         my $part = $graph->copy;
 699         my @todelete = grep { exists $contig->{$_} && $contig->{$_} ne $rid }
 700                 keys %$contig;
 701         $part->delete_vertices( @todelete );
 702         _prune_subtree( $part, $lacunose );
 703                 $subgraph->{$rid} = $part;
 704                 # Record the remaining lacunose nodes as part of this group, if
 705                 # we are dealing with a non-genealogical reading.
 706                 unless( $variant_row->{'genealogical'} ) {
 707                         map { $contig->{$_} = $rid } $part->vertices;
 708                 }
 709                 # Get the reading roots.
 710                 map { $reading_roots->{$_} = $rid } $part->predecessorless_vertices;
 711         }
 712
 713         # Now that we have all the node group memberships, calculate followed/
 714     # non-followed/unknown values for each reading.  Also figure out the
 715     # reading's evident parent(s).
 716     foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
 717         my $rid = $rdghash->{'readingid'};
 718         # Get the subgraph
 719         my $part = $subgraph->{$rid};
 720
 721         # Start figuring things out.
 722         my @roots = grep { $reading_roots->{$_} eq $rid } keys %$reading_roots;
 723         $rdghash->{'independent_occurrence'} = \@roots;
 724         $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
 725         # Find the parent readings, if any, of this reading.
 726         my $rdgparents = {};
 727         foreach my $wit ( @roots ) {
 728                 # Look in the main stemma to find this witness's extant or known-reading
 729                 # immediate ancestor(s), and look up the reading that each ancestor olds.
 730                         my @check = $graph->predecessors( $wit );
 731                         while( @check ) {
 732                                 my @next;
 733                                 foreach my $wparent( @check ) {
 734                                         my $preading = $contig->{$wparent};
 735                                         if( $preading ) {
 736                                                 $rdgparents->{$preading} = 1;
 737                                         } else {
 738                                                 push( @next, $graph->predecessors( $wparent ) );
 739                                         }
 740                                 }
 741                                 @check = @next;
 742                         }
 743                 }
 744                 foreach my $p ( keys %$rdgparents ) {
 745                         # Resolve the relationship of the parent to the reading, and
 746                         # save it in our hash.
 747                         my $pobj = $c->reading( $p );
 748                         my $relation;
 749                         my $prep = $pobj ? $pobj->id . ' (' . $pobj->text . ')' : $p;
 750                         if( $pobj ) {
 751                                 my $rel = $c->get_relationship( $p, $rdghash->{readingid} );
 752                                 if( $rel ) {
 753                                         $relation = { type => $rel->type };
 754                                         if( $rel->has_annotation ) {
 755                                                 $relation->{'annotation'} = $rel->annotation;
 756                                         }
 757                                 }
 758                         }
 759                         my $phash = { 'label' => $prep, 'relation' => $relation };
 760                         $phash->{'text'} = $pobj->text if $pobj;
 761                         $rdgparents->{$p} = $phash;
 762                 }
 763
 764                 $rdghash->{'reading_parents'} = $rdgparents;
 765
 766                 # Find the number of times this reading was altered, and the number of
 767                 # times we're not sure.
 768                 my( %nofollow, %unknownfollow );
 769                 foreach my $wit ( $part->vertices ) {
 770                         foreach my $wchild ( $graph->successors( $wit ) ) {
 771                                 next if $part->has_vertex( $wchild );
 772                                 if( $reading_roots->{$wchild} && $contig->{$wchild} ) {
 773                                         # It definitely changed here.
 774                                         $nofollow{$wchild} = 1;
 775                                 } elsif( !($contig->{$wchild}) ) {
 776                                         # The child is a hypothetical node not definitely in
 777                                         # any group. Answer is unknown.
 778                                         $unknownfollow{$wchild} = 1;
 779                                 } # else it's a non-root node in a known group, and therefore
 780                                   # is presumed to have its reading from its group, not this link.
 781                         }
 782                 }
 783                 $rdghash->{'not_followed'} = keys %nofollow;
 784                 $rdghash->{'follow_unknown'} = keys %unknownfollow;
 785
 786                 # Now say whether this reading represents a conflict.
 787                 unless( $variant_row->{'genealogical'} ) {
 788                         $rdghash->{'conflict'} = @roots != 1;
 789                 }
 790     }
 791 }
 792
 793
 794 =head2 perl_solver( $tradition, $rank, $stemma_id, @merge_relationship_types )
 795
 796 ** NOTE ** This method should hopefully not be called - it is not guaranteed
 797 to be correct.  Serves as a backup for the real solver.
 798
 799 Runs an analysis of the given tradition, at the location given in $rank,
 800 against the graph of the stemma specified in $stemma_id.  The argument
 801 @merge_relationship_types is an optional list of relationship types for
 802 which readings so related should be treated as equivalent.
 803
 804 Returns a nested array data structure as follows:
 805
 806  [ [ group_list, is_genealogical ], [ group_list, is_genealogical ] ... ]
 807
 808 where the group list is the array of arrays passed in for each element of @groups,
 809 possibly with the addition of hypothetical readings.
 810
 811
 812 =cut
 813
 814 sub perl_solver {
 815         my( $graph, @groups ) = @_;
 816         my @answer;
 817         foreach my $g ( @groups ) {
 818                 push( @answer, _solve_variant_location( $graph, $g ) );
 819         }
 820         return \@answer;
 821 }
 822
 823 sub _solve_variant_location {
 824         my( $graph, $groups ) = @_;
 825         # Now do the work.
 826     my $contig = {};
 827     my $subgraph = {};
 828     my $is_conflicted;
 829     my $conflict = {};
 830
 831     # Mark each ms as in its own group, first.
 832     foreach my $g ( @$groups ) {
 833         my $gst = wit_stringify( $g );
 834         map { $contig->{$_} = $gst } @$g;
 835     }
 836
 837     # Now for each unmarked node in the graph, initialize an array
 838     # for possible group memberships.  We will use this later to
 839     # resolve potential conflicts.
 840     map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
 841     foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
 842         my $gst = wit_stringify( $g );  # This is the group name
 843         # Copy the graph, and delete all non-members from the new graph.
 844         my $part = $graph->copy;
 845         my @group_roots;
 846         $part->delete_vertices(
 847             grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
 848
 849         # Now look to see if our group is connected.
 850                 if( @$g > 1 ) {
 851                         # We have to take directionality into account.
 852                         # How many root nodes do we have?
 853                         my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
 854                                 $part->predecessorless_vertices;
 855                         # Assuming that @$g > 1, find the first root node that has at
 856                         # least one successor belonging to our group. If this reading
 857                         # is genealogical, there should be only one, but we will check
 858                         # that implicitly later.
 859                         foreach my $root ( @roots ) {
 860                                 # Prune the tree to get rid of extraneous hypotheticals.
 861                                 $root = _prune_subtree_old( $part, $root, $contig );
 862                                 next unless $root;
 863                                 # Save this root for our group.
 864                                 push( @group_roots, $root );
 865                                 # Get all the successor nodes of our root.
 866                         }
 867                 } else {
 868                         # Dispense with the trivial case of one reading.
 869                         my $wit = $g->[0];
 870                         @group_roots = ( $wit );
 871                         foreach my $v ( $part->vertices ) {
 872                                 $part->delete_vertex( $v ) unless $v eq $wit;
 873                         }
 874         }
 875
 876         if( @group_roots > 1 ) {
 877                 $conflict->{$gst} = 1;
 878                 $is_conflicted = 1;
 879         }
 880         # Paint the 'hypotheticals' with our group.
 881                 foreach my $wit ( $part->vertices ) {
 882                         if( ref( $contig->{$wit} ) ) {
 883                                 push( @{$contig->{$wit}}, $gst );
 884                         } elsif( $contig->{$wit} ne $gst ) {
 885                                 warn "How did we get here?";
 886                         }
 887                 }
 888
 889
 890                 # Save the relevant subgraph.
 891                 $subgraph->{$gst} = $part;
 892     }
 893
 894         # For each of our hypothetical readings, flatten its 'contig' array if
 895         # the array contains zero or one group.  If we have any unflattened arrays,
 896         # we may need to run the resolution process. If the reading is already known
 897         # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
 898         # it.
 899         my @resolve;
 900         foreach my $wit ( keys %$contig ) {
 901                 next unless ref( $contig->{$wit} );
 902                 if( @{$contig->{$wit}} > 1 ) {
 903                         if( $is_conflicted ) {
 904                                 $contig->{$wit} = '';  # We aren't going to decide.
 905                         } else {
 906                                 push( @resolve, $wit );
 907                         }
 908                 } else {
 909                         my $gst = pop @{$contig->{$wit}};
 910                         $contig->{$wit} = $gst || '';
 911                 }
 912         }
 913
 914     if( @resolve ) {
 915         my $still_contig = {};
 916         foreach my $h ( @resolve ) {
 917             # For each of the hypothetical readings with more than one possibility,
 918             # try deleting it from each of its member subgraphs in turn, and see
 919             # if that breaks the contiguous grouping.
 920             # TODO This can still break in a corner case where group A can use
 921             # either vertex 1 or 2, and group B can use either vertex 2 or 1.
 922             # Revisit this if necessary; it could get brute-force nasty.
 923             foreach my $gst ( @{$contig->{$h}} ) {
 924                 my $gpart = $subgraph->{$gst}->copy();
 925                 # If we have come this far, there is only one root and everything
 926                 # is reachable from it.
 927                 my( $root ) = $gpart->predecessorless_vertices;
 928                 my $reachable = {};
 929                 map { $reachable->{$_} = 1 } $gpart->vertices;
 930
 931                 # Try deleting the hypothetical node.
 932                 $gpart->delete_vertex( $h );
 933                 if( $h eq $root ) {
 934                         # See if we still have a single root.
 935                         my @roots = $gpart->predecessorless_vertices;
 936                         warn "This shouldn't have happened" unless @roots;
 937                         if( @roots > 1 ) {
 938                                 # $h is needed by this group.
 939                                 if( exists( $still_contig->{$h} ) ) {
 940                                         # Conflict!
 941                                         $conflict->{$gst} = 1;
 942                                         $still_contig->{$h} = '';
 943                                 } else {
 944                                         $still_contig->{$h} = $gst;
 945                                 }
 946                         }
 947                 } else {
 948                         # $h is somewhere in the middle. See if everything
 949                         # else can still be reached from the root.
 950                                         my %still_reachable = ( $root => 1 );
 951                                         map { $still_reachable{$_} = 1 }
 952                                                 $gpart->all_successors( $root );
 953                                         foreach my $v ( keys %$reachable ) {
 954                                                 next if $v eq $h;
 955                                                 if( !$still_reachable{$v}
 956                                                         && ( $contig->{$v} eq $gst
 957                                                                  || ( exists $still_contig->{$v}
 958                                                                           && $still_contig->{$v} eq $gst ) ) ) {
 959                                                         # We need $h.
 960                                                         if( exists $still_contig->{$h} ) {
 961                                                                 # Conflict!
 962                                                                 $conflict->{$gst} = 1;
 963                                                                 $still_contig->{$h} = '';
 964                                                         } else {
 965                                                                 $still_contig->{$h} = $gst;
 966                                                         }
 967                                                         last;
 968                                                 } # else we don't need $h in this group.
 969                                         } # end foreach $v
 970                                 } # endif $h eq $root
 971             } # end foreach $gst
 972         } # end foreach $h
 973
 974         # Now we have some hypothetical vertices in $still_contig that are the
 975         # "real" group memberships.  Replace these in $contig.
 976                 foreach my $v ( keys %$contig ) {
 977                         next unless ref $contig->{$v};
 978                         $contig->{$v} = $still_contig->{$v};
 979                 }
 980     } # end if @resolve
 981
 982     my $is_genealogical = keys %$conflict ? JSON::false : JSON::true;
 983         my $variant_row = [ [], $is_genealogical ];
 984         # Fill in the groupings from $contig.
 985         foreach my $g ( @$groups ) {
 986         my $gst = wit_stringify( $g );
 987         my @realgroup = grep { $contig->{$_} eq $gst } keys %$contig;
 988         push( @{$variant_row->[0]}, \@realgroup );
 989     }
 990     return $variant_row;
 991 }
 992
 993 sub _prune_group {
 994         my( $group, $stemma, $graph ) = @_;
 995         my $lacunose = {};
 996         map { $lacunose->{$_} = 1 } $stemma->hypotheticals;
 997         map { $lacunose->{$_} = 0 } @$group;
 998         # Make our subgraph
 999         my $subgraph = $graph->copy;
1000         map { $subgraph->delete_vertex( $_ ) unless exists $lacunose->{$_} }
1001                 $subgraph->vertices;
1002         # ...and find the root.
1003         # Now prune and return the remaining vertices.
1004         _prune_subtree( $subgraph, $lacunose );
1005         return $subgraph->vertices;
1006 }
1007
1008 sub _prune_subtree {
1009         my( $tree, $lacunose ) = @_;
1010
1011         # Delete lacunose witnesses that have no successors
1012     my @orphan_hypotheticals;
1013     my $ctr = 0;
1014     do {
1015         die "Infinite loop on leaves" if $ctr > 100;
1016         @orphan_hypotheticals = grep { $lacunose->{$_} }
1017                 $tree->successorless_vertices;
1018         $tree->delete_vertices( @orphan_hypotheticals );
1019         $ctr++;
1020     } while( @orphan_hypotheticals );
1021
1022         # Delete lacunose roots that have a single successor
1023         my @redundant_root;
1024         $ctr = 0;
1025         do {
1026         die "Infinite loop on roots" if $ctr > 100;
1027                 @redundant_root = grep { $lacunose->{$_} && $tree->successors( $_ ) == 1 }
1028                         $tree->predecessorless_vertices;
1029                 $tree->delete_vertices( @redundant_root );
1030                 $ctr++;
1031         } while( @redundant_root );
1032 }
1033
1034 sub _prune_subtree_old {
1035     my( $tree, $root, $contighash ) = @_;
1036     # First, delete hypothetical leaves / orphans until there are none left.
1037     my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
1038         $tree->successorless_vertices;
1039     while( @orphan_hypotheticals ) {
1040         $tree->delete_vertices( @orphan_hypotheticals );
1041         @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
1042             $tree->successorless_vertices;
1043     }
1044     # Then delete a hypothetical root with only one successor, moving the
1045     # root to the first child that has no other predecessors.
1046     while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
1047         my @nextroot = $tree->successors( $root );
1048         $tree->delete_vertex( $root );
1049         ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
1050     }
1051     # The tree has been modified in place, but we need to know the new root.
1052     $root = undef unless $root && $tree->has_vertex( $root );
1053     return $root;
1054 }
1055 # Add the variant, subject to a.c. representation logic.
1056 # This assumes that we will see the 'main' version before the a.c. version.
1057 sub add_variant_wit {
1058     my( $arr, $wit, $acstr ) = @_;
1059     my $skip;
1060     if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
1061         my $real = $1;
1062         $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
1063     }
1064     push( @$arr, $wit ) unless $skip;
1065 }
1066
1067 sub _useful_variant {
1068         my( $group_readings, $graph, $acstr ) = @_;
1069
1070         # TODO Decide what to do with AC witnesses
1071
1072         # Sort by group size and return
1073         my $is_useful = 0;
1074         my( @readings, @groups );   # The sorted groups for our answer.
1075         foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
1076                 keys %$group_readings ) {
1077                 push( @readings, $rdg );
1078                 push( @groups, $group_readings->{$rdg} );
1079                 if( @{$group_readings->{$rdg}} > 1 ) {
1080                         $is_useful++;
1081                 } else {
1082                         my( $wit ) = @{$group_readings->{$rdg}};
1083                         $wit =~ s/^(.*)\Q$acstr\E$/$1/;
1084                         $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
1085                 }
1086         }
1087         if( $is_useful > 1 ) {
1088                 return( \@readings, \@groups );
1089         } else {
1090                 return( [], [] );
1091         }
1092 }
1093
1094 =head2 wit_stringify( $groups )
1095
1096 Takes an array of witness groupings and produces a string like
1097 ['A','B'] / ['C','D','E'] / ['F']
1098
1099 =cut
1100
1101 sub wit_stringify {
1102     my $groups = shift;
1103     my @gst;
1104     # If we were passed an array of witnesses instead of an array of
1105     # groupings, then "group" the witnesses first.
1106     unless( ref( $groups->[0] ) ) {
1107         my $mkgrp = [ $groups ];
1108         $groups = $mkgrp;
1109     }
1110     foreach my $g ( @$groups ) {
1111         push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
1112     }
1113     return join( ' / ', @gst );
1114 }
1115
1116 sub _symmdiff {
1117         my( $lista, $listb ) = @_;
1118         my %union;
1119         my %scalars;
1120         map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
1121         map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
1122         my @set = grep { $union{$_} == 1 } keys %union;
1123         return map { $scalars{$_} } @set;
1124 }
1125
1126 1;
1127
1128 =head1 LICENSE
1129
1130 This package is free software and is provided "as is" without express
1131 or implied warranty.  You can redistribute it and/or modify it under
1132 the same terms as Perl itself.
1133
1134 =head1 AUTHOR
1135
1136 Tara L Andrews E<lt>aurum@cpan.orgE<gt>