X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FCollation.pm;h=022c2bc4e413d684f57444e18b2ac305a5171964;hb=db61303b93ba08ca97778858ee56ad31747dc308;hp=5b3cd96c2c168abd51cba4998350d1153f7a75f7;hpb=9fef629bd3a741a6d74d130f10056898d504fb47;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 5b3cd96..022c2bc 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -7,6 +7,7 @@ use File::Which; use Graph; use IPC::Run qw( run binary ); use Text::CSV; +use Text::Tradition::Collation::Data; use Text::Tradition::Collation::Reading; use Text::Tradition::Collation::RelationshipStore; use Text::Tradition::Error; @@ -15,28 +16,42 @@ use XML::LibXML; use XML::LibXML::XPathContext; use Moose; -has 'sequence' => ( - is => 'ro', - isa => 'Graph', - default => sub { Graph->new() }, - handles => { - paths => 'edges', - }, - ); - -has 'relations' => ( - is => 'ro', - isa => 'Text::Tradition::Collation::RelationshipStore', - handles => { - relationships => 'relationships', - related_readings => 'related_readings', - get_relationship => 'get_relationship', - del_relationship => 'del_relationship', - equivalence => 'equivalence', - equivalence_graph => 'equivalence_graph', - }, - writer => '_set_relations', - ); +has _data => ( + isa => 'Text::Tradition::Collation::Data', + is => 'ro', + required => 1, + handles => [ qw( + sequence + paths + _set_relations + relations + _set_start + _set_end + ac_label + has_cached_table + relationships + related_readings + get_relationship + del_relationship + equivalence + equivalence_graph + readings + reading + _add_reading + del_reading + has_reading + wit_list_separator + baselabel + linear + wordsep + start + end + cached_table + _graphcalc_done + has_cached_svg + wipe_table + )] +); has 'tradition' => ( is => 'ro', @@ -45,83 +60,6 @@ has 'tradition' => ( weak_ref => 1, ); -has 'readings' => ( - isa => 'HashRef[Text::Tradition::Collation::Reading]', - traits => ['Hash'], - handles => { - reading => 'get', - _add_reading => 'set', - del_reading => 'delete', - has_reading => 'exists', - readings => 'values', - }, - default => sub { {} }, - ); - -has 'wit_list_separator' => ( - is => 'rw', - isa => 'Str', - default => ', ', - ); - -has 'baselabel' => ( - is => 'rw', - isa => 'Str', - default => 'base text', - ); - -has 'linear' => ( - is => 'rw', - isa => 'Bool', - default => 1, - ); - -has 'ac_label' => ( - is => 'rw', - isa => 'Str', - default => ' (a.c.)', - ); - -has 'wordsep' => ( - is => 'rw', - isa => 'Str', - default => ' ', - ); - -has 'start' => ( - is => 'ro', - isa => 'Text::Tradition::Collation::Reading', - writer => '_set_start', - weak_ref => 1, - ); - -has 'end' => ( - is => 'ro', - isa => 'Text::Tradition::Collation::Reading', - writer => '_set_end', - weak_ref => 1, - ); - -has 'cached_svg' => ( - is => 'rw', - isa => 'Str', - predicate => 'has_cached_svg', - clearer => 'wipe_svg', - ); - -has 'cached_table' => ( - is => 'rw', - isa => 'HashRef', - predicate => 'has_cached_table', - clearer => 'wipe_table', - ); - -has '_graphcalc_done' => ( - is => 'rw', - isa => 'Bool', - default => undef, - ); - =head1 NAME Text::Tradition::Collation - a software model for a text collation @@ -272,6 +210,20 @@ See L for the available options. =cut +sub BUILDARGS { + my ( $class, @args ) = @_; + my %args = @args == 1 ? %{ $args[0] } : @args; + # TODO determine these from the Moose::Meta object + my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel + linear wordsep start end cached_table _graphcalc_done); + my %data_args; + for my $attr (@delegate_attrs) { + $data_args{$attr} = delete $args{$attr} if exists $args{$attr}; + } + $args{_data} = Text::Tradition::Collation::Data->new(%data_args); + return \%args; +} + sub BUILD { my $self = shift; $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) ); @@ -448,6 +400,15 @@ sub compress_readings { # readings. my %gobbled; foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) { + # While we are here, get rid of any extra wordforms from a disambiguated + # reading. + if( $rdg->disambiguated ) { + foreach my $lex ( $rdg->lexemes ) { + $lex->clear_matching_forms(); + $lex->add_matching_form( $lex->form ); + } + } + # Now look for readings that can be joined to their successors. next if $rdg->is_meta; next if $gobbled{$rdg->id}; next if $rdg->grammar_invalid || $rdg->is_nonsense; @@ -641,22 +602,17 @@ sub as_svg { my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'}; $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear ); - if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) { - my @cmd = qw/dot -Tsvg/; - my( $svg, $err ); - my $dotfile = File::Temp->new(); - ## USE FOR DEBUGGING - # $dotfile->unlink_on_destroy(0); - binmode $dotfile, ':utf8'; - print $dotfile $self->as_dot( $opts ); - push( @cmd, $dotfile->filename ); - run( \@cmd, ">", binary(), \$svg ); - $svg = decode_utf8( $svg ); - $self->cached_svg( $svg ) unless $want_subgraph; - return $svg; - } else { - return $self->cached_svg; - } + my @cmd = qw/dot -Tsvg/; + my( $svg, $err ); + my $dotfile = File::Temp->new(); + ## USE FOR DEBUGGING + # $dotfile->unlink_on_destroy(0); + binmode $dotfile, ':utf8'; + print $dotfile $self->as_dot( $opts ); + push( @cmd, $dotfile->filename ); + run( \@cmd, ">", binary(), \$svg ); + $svg = decode_utf8( $svg ); + return $svg; } @@ -1021,6 +977,7 @@ sub as_graphml { 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd'; # Create the document and root node + require XML::LibXML; my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" ); my $root = $graphml->createElementNS( $graphml_ns, 'graphml' ); $graphml->setDocumentElement( $root ); @@ -1038,11 +995,6 @@ sub as_graphml { 'RelationshipScope' => 'string', ); - # List of attribute names *not* to save on our objects. - # We will also not save any attribute beginning with _. - my %skipsave; - map { $skipsave{$_} = 1 } qw/ cached_svg /; - # Add the data keys for the graph. Include an extra key 'version' for the # GraphML output version. my %graph_data_keys; @@ -1056,7 +1008,6 @@ sub as_graphml { map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes; foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) { next if $attr->name =~ /^_/; - next if $skipsave{$attr->name}; next unless $save_types{$attr->type_constraint->name}; $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; } @@ -1088,7 +1039,6 @@ sub as_graphml { my $rmeta = Text::Tradition::Collation::Reading->meta; foreach my $attr( $rmeta->get_all_attributes ) { next if $attr->name =~ /^_/; - next if $skipsave{$attr->name}; next unless $save_types{$attr->type_constraint->name}; $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; } @@ -1118,7 +1068,6 @@ sub as_graphml { my $pmeta = Text::Tradition::Collation::Relationship->meta; foreach my $attr( $pmeta->get_all_attributes ) { next if $attr->name =~ /^_/; - next if $skipsave{$attr->name}; next unless $save_types{$attr->type_constraint->name}; $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; } @@ -1332,8 +1281,6 @@ sub _make_witness_row { map { $char_hash{$_} = undef } @$positions; my $debug = 0; foreach my $rdg ( @$path ) { - my $rtext = $rdg->text; - $rtext = '#LACUNA#' if $rdg->is_lacuna; say STDERR "rank " . $rdg->rank if $debug; # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank; $char_hash{$rdg->rank} = { 't' => $rdg }; @@ -1643,7 +1590,7 @@ sub calculate_ranks { } } # Do we need to invalidate the cached data? - if( $self->has_cached_svg || $self->has_cached_table ) { + if( $self->has_cached_table ) { foreach my $r ( $self->readings ) { next if defined( $existing_ranks{$r} ) && $existing_ranks{$r} == $r->rank; @@ -1660,7 +1607,6 @@ sub calculate_ranks { sub _clear_cache { my $self = shift; - $self->wipe_svg if $self->has_cached_svg; $self->wipe_table if $self->has_cached_table; }