X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FCollation.pm;h=022c2bc4e413d684f57444e18b2ac305a5171964;hb=db61303b93ba08ca97778858ee56ad31747dc308;hp=bd3b58df878abe0e57ef8262b8c3d1f1d52dface;hpb=6771a1b11e560c54e9cb8cbcb7c18200021894cf;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index bd3b58d..022c2bc 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -7,6 +7,7 @@ use File::Which; use Graph; use IPC::Run qw( run binary ); use Text::CSV; +use Text::Tradition::Collation::Data; use Text::Tradition::Collation::Reading; use Text::Tradition::Collation::RelationshipStore; use Text::Tradition::Error; @@ -15,28 +16,42 @@ use XML::LibXML; use XML::LibXML::XPathContext; use Moose; -has 'sequence' => ( - is => 'ro', - isa => 'Graph', - default => sub { Graph->new() }, - handles => { - paths => 'edges', - }, - ); - -has 'relations' => ( - is => 'ro', - isa => 'Text::Tradition::Collation::RelationshipStore', - handles => { - relationships => 'relationships', - related_readings => 'related_readings', - get_relationship => 'get_relationship', - del_relationship => 'del_relationship', - equivalence => 'equivalence', - equivalence_graph => 'equivalence_graph', - }, - writer => '_set_relations', - ); +has _data => ( + isa => 'Text::Tradition::Collation::Data', + is => 'ro', + required => 1, + handles => [ qw( + sequence + paths + _set_relations + relations + _set_start + _set_end + ac_label + has_cached_table + relationships + related_readings + get_relationship + del_relationship + equivalence + equivalence_graph + readings + reading + _add_reading + del_reading + has_reading + wit_list_separator + baselabel + linear + wordsep + start + end + cached_table + _graphcalc_done + has_cached_svg + wipe_table + )] +); has 'tradition' => ( is => 'ro', @@ -45,83 +60,6 @@ has 'tradition' => ( weak_ref => 1, ); -has 'readings' => ( - isa => 'HashRef[Text::Tradition::Collation::Reading]', - traits => ['Hash'], - handles => { - reading => 'get', - _add_reading => 'set', - del_reading => 'delete', - has_reading => 'exists', - readings => 'values', - }, - default => sub { {} }, - ); - -has 'wit_list_separator' => ( - is => 'rw', - isa => 'Str', - default => ', ', - ); - -has 'baselabel' => ( - is => 'rw', - isa => 'Str', - default => 'base text', - ); - -has 'linear' => ( - is => 'rw', - isa => 'Bool', - default => 1, - ); - -has 'ac_label' => ( - is => 'rw', - isa => 'Str', - default => ' (a.c.)', - ); - -has 'wordsep' => ( - is => 'rw', - isa => 'Str', - default => ' ', - ); - -has 'start' => ( - is => 'ro', - isa => 'Text::Tradition::Collation::Reading', - writer => '_set_start', - weak_ref => 1, - ); - -has 'end' => ( - is => 'ro', - isa => 'Text::Tradition::Collation::Reading', - writer => '_set_end', - weak_ref => 1, - ); - -has 'cached_svg' => ( - is => 'rw', - isa => 'Str', - predicate => 'has_cached_svg', - clearer => 'wipe_svg', - ); - -has 'cached_table' => ( - is => 'rw', - isa => 'HashRef', - predicate => 'has_cached_table', - clearer => 'wipe_table', - ); - -has '_graphcalc_done' => ( - is => 'rw', - isa => 'Bool', - default => undef, - ); - =head1 NAME Text::Tradition::Collation - a software model for a text collation @@ -272,6 +210,20 @@ See L for the available options. =cut +sub BUILDARGS { + my ( $class, @args ) = @_; + my %args = @args == 1 ? %{ $args[0] } : @args; + # TODO determine these from the Moose::Meta object + my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel + linear wordsep start end cached_table _graphcalc_done); + my %data_args; + for my $attr (@delegate_attrs) { + $data_args{$attr} = delete $args{$attr} if exists $args{$attr}; + } + $args{_data} = Text::Tradition::Collation::Data->new(%data_args); + return \%args; +} + sub BUILD { my $self = shift; $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) ); @@ -417,6 +369,8 @@ sub merge_readings { $joinstr = $self->wordsep unless defined $joinstr; } $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) ); + # Change this reading to a joining one if necessary + $kept_obj->_set_join_next( $del_obj->join_next ); $kept_obj->normal_form( join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) ); # Combine the lexemes present in the readings @@ -446,6 +400,15 @@ sub compress_readings { # readings. my %gobbled; foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) { + # While we are here, get rid of any extra wordforms from a disambiguated + # reading. + if( $rdg->disambiguated ) { + foreach my $lex ( $rdg->lexemes ) { + $lex->clear_matching_forms(); + $lex->add_matching_form( $lex->form ); + } + } + # Now look for readings that can be joined to their successors. next if $rdg->is_meta; next if $gobbled{$rdg->id}; next if $rdg->grammar_invalid || $rdg->is_nonsense; @@ -639,22 +602,17 @@ sub as_svg { my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'}; $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear ); - if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) { - my @cmd = qw/dot -Tsvg/; - my( $svg, $err ); - my $dotfile = File::Temp->new(); - ## USE FOR DEBUGGING - # $dotfile->unlink_on_destroy(0); - binmode $dotfile, ':utf8'; - print $dotfile $self->as_dot( $opts ); - push( @cmd, $dotfile->filename ); - run( \@cmd, ">", binary(), \$svg ); - $svg = decode_utf8( $svg ); - $self->cached_svg( $svg ) unless $want_subgraph; - return $svg; - } else { - return $self->cached_svg; - } + my @cmd = qw/dot -Tsvg/; + my( $svg, $err ); + my $dotfile = File::Temp->new(); + ## USE FOR DEBUGGING + # $dotfile->unlink_on_destroy(0); + binmode $dotfile, ':utf8'; + print $dotfile $self->as_dot( $opts ); + push( @cmd, $dotfile->filename ); + run( \@cmd, ">", binary(), \$svg ); + $svg = decode_utf8( $svg ); + return $svg; } @@ -971,13 +929,23 @@ is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all read is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" ); is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" ); -# Now add a stemma, write to GraphML, and parse again. +# Now add a stemma, write to GraphML, and look at the output. my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' ); is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" ); is( $tradition->stemmata, 1, "Tradition now has the stemma" ); $graphml = $c->as_graphml; like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" ); +# Now add a user, write to GraphML, and look at the output. +unlike( $graphml, qr/testuser/, "Test user name does not exist in GraphML yet" ); +my $testuser = Text::Tradition::User->new( + id => 'testuser', password => 'testpass' ); +is( ref( $testuser ), 'Text::Tradition::User', "Created test user object" ); +$testuser->add_tradition( $tradition ); +is( $tradition->user->id, $testuser->id, "Tradition assigned to test user" ); +$graphml = $c->as_graphml; +like( $graphml, qr/testuser/, "Test user name now exists in GraphML" ); + =end testing =cut @@ -1009,6 +977,7 @@ sub as_graphml { 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd'; # Create the document and root node + require XML::LibXML; my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" ); my $root = $graphml->createElementNS( $graphml_ns, 'graphml' ); $graphml->setDocumentElement( $root ); @@ -1026,11 +995,6 @@ sub as_graphml { 'RelationshipScope' => 'string', ); - # List of attribute names *not* to save on our objects. - # We will also not save any attribute beginning with _. - my %skipsave; - map { $skipsave{$_} = 1 } qw/ cached_svg /; - # Add the data keys for the graph. Include an extra key 'version' for the # GraphML output version. my %graph_data_keys; @@ -1044,18 +1008,28 @@ sub as_graphml { map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes; foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) { next if $attr->name =~ /^_/; - next if $skipsave{$attr->name}; next unless $save_types{$attr->type_constraint->name}; $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; } - # Extra custom key for the tradition stemma(ta) - $graph_attributes{'stemmata'} = 'string'; + # Extra custom keys for complex objects that should be saved in some form. + # The subroutine should return a string, or undef/empty. + $graph_attributes{'stemmata'} = sub { + my @stemstrs; + map { push( @stemstrs, $_->editable( {linesep => ''} ) ) } + $self->tradition->stemmata; + join( "\n", @stemstrs ); + }; + $graph_attributes{'user'} = sub { + $self->tradition->user ? $self->tradition->user->id : undef + }; foreach my $datum ( sort keys %graph_attributes ) { $graph_data_keys{$datum} = 'dg'.$gdi++; my $key = $root->addNewChild( $graphml_ns, 'key' ); + my $dtype = ref( $graph_attributes{$datum} ) ? 'string' + : $graph_attributes{$datum}; $key->setAttribute( 'attr.name', $datum ); - $key->setAttribute( 'attr.type', $graph_attributes{$datum} ); + $key->setAttribute( 'attr.type', $dtype ); $key->setAttribute( 'for', 'graph' ); $key->setAttribute( 'id', $graph_data_keys{$datum} ); } @@ -1065,7 +1039,6 @@ sub as_graphml { my $rmeta = Text::Tradition::Collation::Reading->meta; foreach my $attr( $rmeta->get_all_attributes ) { next if $attr->name =~ /^_/; - next if $skipsave{$attr->name}; next unless $save_types{$attr->type_constraint->name}; $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; } @@ -1095,7 +1068,6 @@ sub as_graphml { my $pmeta = Text::Tradition::Collation::Relationship->meta; foreach my $attr( $pmeta->get_all_attributes ) { next if $attr->name =~ /^_/; - next if $skipsave{$attr->name}; next unless $save_types{$attr->type_constraint->name}; $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name}; } @@ -1128,11 +1100,9 @@ sub as_graphml { my $value; if( $datum eq 'version' ) { $value = '3.2'; - } elsif( $datum eq 'stemmata' ) { - my @stemstrs; - map { push( @stemstrs, $_->editable( {linesep => ''} ) ) } - $self->tradition->stemmata; - $value = join( "\n", @stemstrs ); + } elsif( ref( $graph_attributes{$datum} ) ) { + my $sub = $graph_attributes{$datum}; + $value = &$sub(); } elsif( $gattr_from{$datum} eq 'Tradition' ) { $value = $self->tradition->$datum; } else { @@ -1311,8 +1281,6 @@ sub _make_witness_row { map { $char_hash{$_} = undef } @$positions; my $debug = 0; foreach my $rdg ( @$path ) { - my $rtext = $rdg->text; - $rtext = '#LACUNA#' if $rdg->is_lacuna; say STDERR "rank " . $rdg->rank if $debug; # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank; $char_hash{$rdg->rank} = { 't' => $rdg }; @@ -1622,7 +1590,7 @@ sub calculate_ranks { } } # Do we need to invalidate the cached data? - if( $self->has_cached_svg || $self->has_cached_table ) { + if( $self->has_cached_table ) { foreach my $r ( $self->readings ) { next if defined( $existing_ranks{$r} ) && $existing_ranks{$r} == $r->rank; @@ -1639,7 +1607,6 @@ sub calculate_ranks { sub _clear_cache { my $self = shift; - $self->wipe_svg if $self->has_cached_svg; $self->wipe_table if $self->has_cached_table; }