From: Tara L Andrews Date: Sun, 15 May 2011 22:06:09 +0000 (+0200) Subject: make some progress, also make perl -cw work X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d047cd5205779949e2d2c2d6f0e99077c5dc9c94;p=scpubgit%2Fstemmatology.git make some progress, also make perl -cw work --- diff --git a/lib/Text/Tradition.pm b/lib/Text/Tradition.pm index d376c86..a156c0d 100644 --- a/lib/Text/Tradition.pm +++ b/lib/Text/Tradition.pm @@ -29,8 +29,8 @@ around BUILDARGS => sub { # Now @_ contains the original constructor args. Make a # collation argument and a witnesses argument. my %init_args = @_; - my %member_objects = { 'collation' => undef, - 'witnesses' => [] }; + my %member_objects = ( 'collation' => undef, + 'witnesses' => [] ); if( exists $init_args{'witnesses'} ) { # We got passed an uncollated list of witnesses. Make a @@ -58,7 +58,7 @@ around BUILDARGS => sub { $member_objects{'collation'} = Text::Tradition::Collation->new( %init_args ); @{$member_objects{'witnesses'}} = - $member_objects->{'collation'}->create_witnesses(); + $member_objects{'collation'}->create_witnesses(); } return $class->$orig( %member_objects ); diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 3d6b612..fe1570b 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -1,25 +1,27 @@ package Text::Tradition::Collation; + +use Graph::Easy; use Moose; has 'graph' => ( - is => 'ro', - isa => 'Graph::Easy', - writer => '_init_graph', - handles => { - add_node => 'add_reading', - del_node => 'del_reading', - add_edge => 'add_path', - del_edge => 'del_path', - nodes => 'readings', - edges => 'paths', - ); + is => 'ro', + isa => 'Graph::Easy', + handles => { + add_node => 'add_reading', + del_node => 'del_reading', + add_edge => 'add_path', + del_edge => 'del_path', + nodes => 'readings', + edges => 'paths', + }, + default => sub { Graph::Easy->new( undirected => 0 ) }, + ); -# TODO do we not have a way to access the parent object? has 'tradition' => ( - is => 'ro', - isa => 'Text::Tradition', - ); + is => 'ro', + isa => 'Text::Tradition', + ); # The collation can be created two ways: # 1. Collate a set of witnesses (with CollateX I guess) and process @@ -36,7 +38,53 @@ has 'tradition' => ( # constructor will also need to make the witness objects, if we didn't # come through option 1. -# TODO BUILDARGS +sub BUILD { + my( $self, $args ) = @_; + + # Call the appropriate parser on the given data + my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %$args ); + my $format = shift( @formats ); + unless( $format ) { + warn "No data given to create a graph; will initialize an empty one"; + } + if( $format =~ /^(CSV|CTE)$/ && !exists $args->{'base'} ) { + warn "Cannot make a graph from $format without a base text"; + return; + } + + # Initialize our graph object. + $self->graph->set_attribute( 'node', 'shape', 'ellipse' ); + # Starting point for all texts + my $last_node = $self->graph->add_node( '#START#' ); + + # Now do the parsing. + my @sigla; + if( $format ) { + my @parseargs; + if( $format =~ /^(CSV|CTE)$/ ) { + @parseargs = ( 'base' => $args->{'base'}, + 'data' => $args->{$format}, + 'format' => $format ); + $format = 'BaseText'; + } else { + @parseargs = ( $args->{ $format } ); + } + my $mod = "Text::Tradition::Parser::$format"; + load( $mod ); + # TODO parse needs to return witness IDs + @sigla = $mod->can('parse')->( $self->graph, @parseargs ); + } + + # Do we need to initialize the witnesses? + unless( $args->{'have_witnesses'} ) { + # initialize Witness objects for all our witnesses + my @witnesses; + foreach my $sigil ( @sigla ) { + push( @witnesses, Text::Tradition::Witness->new( 'sigil' => $sigil ) ); + } + $self->tradition->witnesses( \@witnesses ); + } +} # Wrappers around some methods diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index 54cd005..86b2869 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -1,6 +1,7 @@ package Text::Tradition::Collation::Reading; use Moose; +use Moose::Util::TypeConstraints; use MooseX::NonMoose; extends 'Graph::Easy::Node'; @@ -11,26 +12,31 @@ subtype 'Position' message { 'Position must be of the form x,y' }; has 'position' => ( - is => 'rw', - isa => 'Position', - ); + is => 'rw', + isa => 'Position', + ); # This contains an array of reading objects; the array is a pool, # shared by the reading objects inside the pool. When a reading is # added to the pool, all the same_as attributes should be updated. has 'same_as' => ( - is => 'rw', - isa => 'ArrayRef[Text::Tradition::Collation::Reading]', - default => [ $self ], - ); + is => 'rw', + isa => 'ArrayRef[Text::Tradition::Collation::Reading]', + ); # This is a hash mapping of 'relationship => reading'. # TODO we should validate the relationships sometime. -has 'equivalence' => ( - is => 'ro', - isa => 'HashRef[Text::Tradition::Collation::Reading]', - default => {}, - ); +has 'relationships' => ( + is => 'ro', + isa => 'HashRef[Text::Tradition::Collation::Reading]', + default => sub { {} }, + ); + +# Initialize the identity pool. +sub BUILD { + my( $self, $args ) = @_; +# $self->same_as( [ $self ] ); +} sub merge_from { my( $self, $merged_node ) = @_; @@ -39,19 +45,19 @@ sub merge_from { my $new_pool = _merge_array_pool( \@now_identical, $self->same_as ) if @now_identical; - # Adopt the equivalence attributes of the other node. - my $now_equiv = $merged_node->equivalence; - foreach my $key ( %$now_equiv ) { + # Adopt the relationship attributes of the other node. + my $now_rel = $merged_node->relationships; + foreach my $key ( %$now_rel ) { if( $self->has_relationship( $key ) ) { my $related = $self->get_relationship( $key ); - if( $now_equiv->{$key} ne $related ) { + if( $now_rel->{$key} ne $related ) { warn( sprintf( "Merged reading %s has relationship %s to reading %s instead of %s; skipping", $merged_node->name, $key, - $now_equiv->{$key}, + $now_rel->{$key}, $related) ); } # else no action needed } else { - $self->set_relationship( $key, $now_equiv->{$key} ); + $self->set_relationship( $key, $now_rel->{$key} ); } } } @@ -80,3 +86,48 @@ sub _merge_array_pool { } return $main_pool; } + +# Much easier to do this with a hash than with an array of Relationship objects, +# which would be the proper OO method. + +sub has_relationship { + my( $self, $rel ) = @_; + return exists( $self->relationships->{ $rel } ); +} + +sub get_relationship { + my( $self, $rel ) = @_; + if( $self->has_relationship( $rel ) ) { + return $self->relationships->{ $rel }; + } + return undef; +} + +sub set_relationship { + my( $self, $rel, $value ) = @_; + $self->relationships->{ $rel } = $value; +} + +no Moose; +__PACKAGE__->meta->make_immutable; + +1; + +###################################################### +## copied from Graph::Easy::Parser docs +###################################################### +# when overriding nodes, we also need ::Anon + +package Text::Tradition::Collation::Reading::Anon; + +use base qw/Text::Tradition::Collation::Reading/; +use base qw/Graph::Easy::Node::Anon/; + +###################################################### +# and :::Empty + +package Text::Tradition::Collation::Reading::Empty; + +use base qw/Text::Tradition::Collation::Reading/; + +###################################################### diff --git a/lib/Text/Tradition/Collation/Relationship.pm b/lib/Text/Tradition/Collation/Relationship.pm new file mode 100644 index 0000000..85f872d --- /dev/null +++ b/lib/Text/Tradition/Collation/Relationship.pm @@ -0,0 +1,27 @@ +package Text::Tradition::Collation::Relationship; + +use Moose; +use Moose::Util::TypeConstraints; + +enum 'RelationshipType' => qw( spelling orthographic grammatical ); + +has 'sort' => ( + is => 'rw', + isa => 'RelationshipType', + required => 1, +); + +has 'reading' => ( + is => 'rw', + isa => 'Text::Tradition::Collation::Reading', + required => 1, +); + +has 'global' => ( + is => 'rw', + isa => 'Bool', + default => 1, +); + + no Moose; + __PACKAGE__->meta->make_immutable; diff --git a/lib/Text/Tradition/Witness.pm b/lib/Text/Tradition/Witness.pm index 4215f87..90db6ca 100644 --- a/lib/Text/Tradition/Witness.pm +++ b/lib/Text/Tradition/Witness.pm @@ -3,29 +3,31 @@ use Moose; # Sigil. Required identifier for a witness. has 'sigil' => ( - is => 'ro', - isa => 'Str', - ); + is => 'ro', + isa => 'Str', + required => 1, + ); -# Text. This might be an array of strings, but it might also be an -# array of graph nodes. +# Text. This is an array of strings (i.e. word tokens). +# TODO Think about how to handle this for the case of pre-prepared +# collations, where the tokens are in the graph already. has 'text' => ( - is => 'rw', - isa => 'Array', - ); + is => 'rw', + isa => 'ArrayRef[Str]', + ); -# File. This is where we read in the witness, if not from a -# pre-prepared collation. -has 'file' => ( - is => 'ro', - isa => 'Str', - ); +# Source. This is where we read in the witness, if not from a +# pre-prepared collation. It is probably a filename. +has 'source' => ( + is => 'ro', + isa => 'Str', + ); sub BUILD { my $self = shift; - if( $self->has_file ) { + if( $self->has_source ) { # Read the file and initialize the text. - open( WITNESS, $self->file ) or die "Could not open " + open( WITNESS, $self->source ) or die "Could not open " . $self->file . "for reading"; # TODO support TEI as well as plaintext, sometime my @words; @@ -34,7 +36,7 @@ sub BUILD { push( @words, split( /\s+/, $_ ) ); } close WITNESS; - $self->text( @words ); + $self->text( \@words ); } }