From: Tara L Andrews Date: Thu, 22 Dec 2011 13:30:31 +0000 (+0100) Subject: stop saving duplicate path arrays in witnesses; get rid of relationship X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b74d89f9e926466ba4ded77746fd0f98912cc17a;p=scpubgit%2Fstemmatology.git stop saving duplicate path arrays in witnesses; get rid of relationship vectors that we don't use --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 236a9bd..5f94dfb 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -438,6 +438,7 @@ sub as_graphml { _add_graphml_data( $node_el, $node_data_keys{'rank'}, $n->rank ) if $n->has_rank; _add_graphml_data( $node_el, $node_data_keys{'class'}, $n->sub_class ); + $DB::single = 1 if $n->has_primary && $n->primary ne $n; _add_graphml_data( $node_el, $node_data_keys{'identical'}, $n->primary->name ) if $n->has_primary && $n->primary ne $n; } @@ -530,11 +531,14 @@ sub make_alignment_table { my @all_pos = ( 0 .. $self->end->rank - 1 ); foreach my $wit ( $self->tradition->witnesses ) { # print STDERR "Making witness row(s) for " . $wit->sigil . "\n"; - my @row = _make_witness_row( $wit->path, \@all_pos, $noderefs ); + my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil ); + my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs ); unshift( @row, $wit->sigil ); push( @$table, \@row ); - if( $wit->has_ante_corr ) { - my @ac_row = _make_witness_row( $wit->uncorrected_path, \@all_pos, $noderefs ); + if( $wit->is_layered ) { + my @wit_ac_path = $self->reading_sequence( $self->start, $self->end, + $wit->sigil.$self->ac_label, $wit->sigil ); + my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs ); unshift( @ac_row, $wit->sigil . $self->ac_label ); push( @$table, \@ac_row ); } @@ -861,69 +865,12 @@ sub _is_within { ## INITIALIZATION METHODS - for use by parsers -# Walk the paths for each witness in the graph, and return the nodes -# that the graph has in common. If $using_base is true, some -# different logic is needed. -# NOTE This does not create paths; it merely finds common readings. - -sub walk_witness_paths { - my( $self ) = @_; - # For each witness, walk the path through the graph. - # Then we need to find the common nodes. - # TODO This method is going to fall down if we have a very gappy - # text in the collation. - my $paths = {}; - my @common_readings; - foreach my $wit ( $self->tradition->witnesses ) { - my $curr_reading = $self->start; - my @wit_path = $self->reading_sequence( $self->start, $self->end, - $wit->sigil ); - $wit->path( \@wit_path ); - - # Detect the common readings. - @common_readings = _find_common( \@common_readings, \@wit_path ); - } - - # Mark all the nodes as either common or not. - foreach my $cn ( @common_readings ) { - print STDERR "Setting " . $cn->name . " / " . $cn->label - . " as common node\n"; - $cn->make_common; - } - foreach my $n ( $self->readings() ) { - $n->make_variant unless $n->is_common; - } - # Return an array of the common nodes in order. - return @common_readings; -} - -sub _find_common { - my( $common_readings, $new_path ) = @_; - my @cr; - if( @$common_readings ) { - foreach my $n ( @$new_path ) { - push( @cr, $n ) if grep { $_ eq $n } @$common_readings; - } - } else { - push( @cr, @$new_path ); - } - return @cr; -} - -sub _remove_common { - my( $common_readings, $divergence ) = @_; - my @cr; - my %diverged; - map { $diverged{$_->name} = 1 } @$divergence; - foreach( @$common_readings ) { - push( @cr, $_ ) unless $diverged{$_->name}; - } - return @cr; -} - # For use when a collation is constructed from a base text and an apparatus. # We have the sequences of readings and just need to add path edges. +# When we are done, clear out the witness path attributes, as they are no +# longer needed. +# TODO Find a way to replace the witness path attributes with encapsulated functions? sub make_witness_paths { my( $self ) = @_; @@ -940,7 +887,7 @@ sub make_witness_path { foreach my $idx ( 0 .. $#chain-1 ) { $self->add_path( $chain[$idx], $chain[$idx+1], $sig ); } - if( $wit->has_ante_corr ) { + if( $wit->is_layered ) { @chain = @{$wit->uncorrected_path}; foreach my $idx( 0 .. $#chain-1 ) { my $source = $chain[$idx]; @@ -949,6 +896,8 @@ sub make_witness_path { unless $self->has_path( $source, $target, $sig ); } } + $wit->clear_path; + $wit->clear_uncorrected_path; } sub calculate_ranks { diff --git a/lib/Text/Tradition/Collation/Relationship.pm b/lib/Text/Tradition/Collation/Relationship.pm index c2ad3ae..9c6e80e 100644 --- a/lib/Text/Tradition/Collation/Relationship.pm +++ b/lib/Text/Tradition/Collation/Relationship.pm @@ -12,19 +12,6 @@ extends 'Graph::Easy::Edge'; enum 'RelationshipType' => qw( spelling orthographic grammatical repetition lexical ); -subtype 'RelationshipVector', - => as 'ArrayRef', - => where { @$_ == 2 - && $_->[0]->isa( 'Graph::Easy::Node' ) - && $_->[1]->isa( 'Graph::Easy::Node' ) - }, - message { 'Argument should be [ SourceReading, TargetReading ]' }; - -subtype 'RelationshipTokenVector', - => as 'ArrayRef', - => where { @$_ == 2 }, - message { 'Argument should be [ \'source\', \'target\' ]' }; - no Moose::Util::TypeConstraints; ## see comment above has 'type' => ( @@ -33,17 +20,6 @@ has 'type' => ( required => 1, ); -has 'this_relation' => ( - is => 'rw', - isa => 'RelationshipVector', - required => 1, -); - -has 'primary_relation' => ( - is => 'rw', - isa => 'RelationshipTokenVector', -); - has 'global' => ( is => 'rw', isa => 'Bool', @@ -82,10 +58,6 @@ sub BUILD { $self->set_attribute( 'class', 'relationship' ); - unless( $self->primary_relation ) { - $self->primary_relation( [ $self->this_relation->[0]->label, - $self->this_relation->[1]->label ] ); - } } no Moose; diff --git a/lib/Text/Tradition/Parser/CollateX.pm b/lib/Text/Tradition/Parser/CollateX.pm index 62123fe..f2dac33 100644 --- a/lib/Text/Tradition/Parser/CollateX.pm +++ b/lib/Text/Tradition/Parser/CollateX.pm @@ -2,7 +2,7 @@ package Text::Tradition::Parser::CollateX; use strict; use warnings; -use Text::Tradition::Parser::GraphML qw/ graphml_parse populate_witness_path /; +use Text::Tradition::Parser::GraphML qw/ graphml_parse /; =head1 NAME @@ -163,9 +163,6 @@ sub parse { } } - # Set the $witness->path arrays for each wit. - populate_witness_path( $tradition ); - # Rank the readings. $collation->calculate_ranks() if $collation->linear; } diff --git a/lib/Text/Tradition/Parser/GraphML.pm b/lib/Text/Tradition/Parser/GraphML.pm index 9fe2911..fbc0d3a 100644 --- a/lib/Text/Tradition/Parser/GraphML.pm +++ b/lib/Text/Tradition/Parser/GraphML.pm @@ -8,7 +8,7 @@ use vars qw/ @EXPORT_OK $xpc /; use XML::LibXML; use XML::LibXML::XPathContext; -@EXPORT_OK = qw/ graphml_parse populate_witness_path /; +@EXPORT_OK = qw/ graphml_parse /; =head1 NAME @@ -130,28 +130,6 @@ sub graphml_parse { return $graph_hash; } -=head2 B( $tradition ) - -Given a tradition, populate the 'path' and 'uncorrected_path' attributes -of all of its witnesses. Useful for all formats based on the graph itself. - -=cut - -sub populate_witness_path { - my ( $tradition, $ante_corr ) = @_; - my $c = $tradition->collation; - print STDERR "Walking paths for witnesses\n"; - foreach my $wit ( $tradition->witnesses ) { - my @path = $c->reading_sequence( $c->start, $c->end, $wit->sigil ); - $wit->path( \@path ); - if( $ante_corr->{$wit->sigil} ) { - # Get the uncorrected path too - my @uc = $c->reading_sequence( $c->start, $c->end, - $wit->sigil . $c->ac_label, $wit->sigil ); - $wit->uncorrected_path( \@uc ); - } - } -} sub _lookup_node_data { my( $xmlnode, $key ) = @_; diff --git a/lib/Text/Tradition/Parser/Self.pm b/lib/Text/Tradition/Parser/Self.pm index a34c9f5..7bc2c6d 100644 --- a/lib/Text/Tradition/Parser/Self.pm +++ b/lib/Text/Tradition/Parser/Self.pm @@ -2,7 +2,7 @@ package Text::Tradition::Parser::Self; use strict; use warnings; -use Text::Tradition::Parser::GraphML qw/ graphml_parse populate_witness_path /; +use Text::Tradition::Parser::GraphML qw/ graphml_parse /; =head1 NAME @@ -171,7 +171,6 @@ sub parse { # Now add the edges. print STDERR "Adding graph edges\n"; - my $has_ante_corr = {}; foreach my $e ( @{$graph_data->{'edges'}} ) { my $from = $e->{$SOURCE_KEY}; my $to = $e->{$TARGET_KEY}; @@ -190,7 +189,7 @@ sub parse { $tradition->add_witness( sigil => $wit ); $witnesses{$wit} = 1; } - $has_ante_corr->{$wit} = 1 if $extra; + $tradition->witness( $wit )->is_layered( 1 ) if $extra; } elsif( $class eq 'relationship' ) { # We need the metadata about the relationship. my $opts = { 'type' => $e->{$RELATIONSHIP_KEY} }; @@ -222,9 +221,6 @@ sub parse { } } } - - # Set the $witness->path arrays for each wit. - populate_witness_path( $tradition, $has_ante_corr ); } 1; diff --git a/lib/Text/Tradition/Parser/TEI.pm b/lib/Text/Tradition/Parser/TEI.pm index db18da6..6eb5541 100644 --- a/lib/Text/Tradition/Parser/TEI.pm +++ b/lib/Text/Tradition/Parser/TEI.pm @@ -158,7 +158,6 @@ sub parse { $c->add_path( $source, $rdg, $sig ); $source = $rdg; } - $tradition->witness( $sig )->path( \@real_sequence ); # See if we need to make an a.c. version of the witness. if( exists $app_ac->{$sig} ) { my @uncorrected; @@ -180,7 +179,7 @@ sub parse { $source = $rdg; } print STDERR "Adding a.c. version for witness $sig\n"; - $tradition->witness( $sig )->uncorrected_path( \@uncorrected ); + $tradition->witness( $sig )->is_layered( 1 ); } } diff --git a/lib/Text/Tradition/Witness.pm b/lib/Text/Tradition/Witness.pm index 5010ffe..dc38c05 100644 --- a/lib/Text/Tradition/Witness.pm +++ b/lib/Text/Tradition/Witness.pm @@ -65,15 +65,10 @@ Accessor method for the witness identifier. Accessor method for the general witness description. -=head2 path +=head2 is_layered -An array of Text::Tradition::Collation::Reading objects which, taken in -sequence, represent the text. - -=head2 uncorrected_path - -An array of Text::Tradition::Collation::Reading objects which, taken in -sequence, represent the text before any scribal corrections were made. +Boolean method to note whether the witness has layers (e.g. pre-correction +readings) in the collation. =begin testing @@ -118,18 +113,24 @@ has 'source' => ( predicate => 'has_source', ); -# Path. This is an array of Reading nodes that should mirror the -# text above. +# Path. This is an array of Reading nodes that can be saved during +# initialization, but should be cleared before saving in a DB. has 'path' => ( is => 'rw', isa => 'ArrayRef[Text::Tradition::Collation::Reading]', predicate => 'has_path', + clearer => 'clear_path', ); has 'uncorrected_path' => ( is => 'rw', isa => 'ArrayRef[Text::Tradition::Collation::Reading]', - predicate => 'has_ante_corr', + clearer => 'clear_uncorrected_path', + ); + +has 'is_layered' => ( + is => 'rw', + isa => 'Bool', ); # Manuscript name or similar @@ -144,6 +145,14 @@ has 'other_info' => ( isa => 'Str', ); +# If we set an uncorrected path, ever, remember that we did so. +around 'uncorrected_path' => sub { + my $orig = shift; + my $self = shift; + + $self->is_layered( 1 ); + $self->$orig( @_ ); +}; sub BUILD { my $self = shift; @@ -202,6 +211,7 @@ around text => sub { $self->$orig( @_ ); }; + no Moose; __PACKAGE__->meta->make_immutable; diff --git a/script/save_to_db.pl b/script/save_to_db.pl index 45ebb17..497c723 100644 --- a/script/save_to_db.pl +++ b/script/save_to_db.pl @@ -36,24 +36,27 @@ if( $dir ) { opendir( DIR, $dir ) or die "Could not open directory $dir"; while( readdir DIR ) { next unless /\.xml$/; - my $stemmafile = "$dir/" . $stemma_map{$_}; + print STDERR "Looking at $_\n"; my $tradition = Text::Tradition->new( 'input' => 'Self', 'file' => "$dir/$_", 'linear' => 1, ); - open my $stemma_fh, '<', $stemmafile or die "Could not read stemma file $stemmafile"; - my $stemma = Text::Tradition::Stemma->new( - 'collation' => $tradition->collation, - 'dot' => $stemma_fh, - ); + my $stemma; + if( exists $stemma_map{$_} ) { + my $stemmafile = "$dir/" . $stemma_map{$_}; + open my $stemma_fh, '<', $stemmafile or die "Could not read stemma file $stemmafile"; + $stemma = Text::Tradition::Stemma->new( + 'collation' => $tradition->collation, + 'dot' => $stemma_fh, + ); + } my $scope = $kdb->new_scope; my $tid = $kdb->store( $tradition ); - my $sid = $kdb->store( $stemma ); - - print STDERR "Stored tradition and stemma for " . $tradition->name - . ", got $tid / $sid as the ref\n"; + my $sid = $kdb->store( $stemma ) if $stemma; + print STDERR "Stored tradition for " . $tradition->name . " at $tid\n"; + print STDERR "\tand stemma at $sid\n" if $stemma; } }