X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition.pm;h=9a0f8b508e273c3357fd0cd40f9cf010e6f559b8;hb=10943ab0b79fbd489f6beb3b81a13ed8cbcfafcf;hp=a156c0dca5df8cc727f0ccc25d722989d417d7ad;hpb=d047cd5205779949e2d2c2d6f0e99077c5dc9c94;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition.pm b/lib/Text/Tradition.pm index a156c0d..9a0f8b5 100644 --- a/lib/Text/Tradition.pm +++ b/lib/Text/Tradition.pm @@ -1,85 +1,398 @@ package Text::Tradition; -use Text::Tradition::Witness; -use Text::Tradition::Collation; +use JSON qw / from_json /; +use Module::Load; use Moose; +use Text::Tradition::Collation; +use Text::Tradition::Stemma; +use Text::Tradition::Witness; + +use vars qw( $VERSION ); +$VERSION = "0.5"; has 'collation' => ( - is => 'ro', - isa => 'Text::Tradition::Collation', - ); - -has 'witnesses' => ( - traits => ['Array'], - is => 'rw', - isa => 'ArrayRef[Text::Tradition::Witness]', - handles => { - all_options => 'elements', - add_option => 'push', - map_options => 'map', - option_count => 'count', - sorted_options => 'sort', - }, - ); - -around BUILDARGS => sub { + is => 'ro', + isa => 'Text::Tradition::Collation', + writer => '_save_collation', + ); + +has 'witness_hash' => ( + traits => ['Hash'], + isa => 'HashRef[Text::Tradition::Witness]', + handles => { + witness => 'get', + add_witness => 'set', + del_witness => 'delete', + has_witness => 'exists', + witnesses => 'values', + }, + default => sub { {} }, + ); + +has 'name' => ( + is => 'rw', + isa => 'Str', + default => 'Tradition', + ); + +has 'language' => ( + is => 'rw', + isa => 'Str', + predicate => 'has_language', + ); + +has 'stemmata' => ( + traits => ['Array'], + isa => 'ArrayRef[Text::Tradition::Stemma]', + handles => { + stemmata => 'elements', + _add_stemma => 'push', + stemma => 'get', + stemma_count => 'count', + clear_stemmata => 'clear', + }, + default => sub { [] }, + ); + +has 'initialized' => ( + is => 'ro', + isa => 'Bool', + default => undef, + writer => '_init_done', + ); + +# Create the witness before trying to add it +around 'add_witness' => sub { my $orig = shift; - my $class = shift; - - # Now @_ contains the original constructor args. Make a - # collation argument and a witnesses argument. - my %init_args = @_; - my %member_objects = ( 'collation' => undef, - 'witnesses' => [] ); - - if( exists $init_args{'witnesses'} ) { - # We got passed an uncollated list of witnesses. Make a - # witness object for each witness, and then send them to the - # collator. - my $autosigil = 0; - foreach my $wit ( %{$init_args{'witnesses'}} ) { - # Each item in the list is either a string or an arrayref. - # If it's a string, it is a filename; if it's an arrayref, - # it is a tuple of 'sigil, file'. Handle either case. - my $args; - if( ref( $wit ) eq 'ARRAY' ) { - $args = { 'sigil' => $wit->[0], - 'file' => $wit->[1] }; - } else { - $args = { 'sigil' => chr( $autosigil+65 ), - 'file' => $wit }; - $autosigil++; - } - push( @{$member_objects{'witnesses'}}, - Text::Tradition::Witness->new( $args ) ); - # Now how to collate these? - } - } else { - $member_objects{'collation'} = - Text::Tradition::Collation->new( %init_args ); - @{$member_objects{'witnesses'}} = - $member_objects{'collation'}->create_witnesses(); + my $self = shift; + # TODO allow add of a Witness object? + my %args = @_ == 1 ? %{$_[0]} : @_; + $args{'tradition'} = $self; + $args{'language'} = $self->language + if( $self->language && !exists $args{'language'} ); + my $new_wit = Text::Tradition::Witness->new( %args ); + $self->$orig( $new_wit->sigil => $new_wit ); + return $new_wit; +}; + +# Allow deletion of witness by object as well as by sigil +around 'del_witness' => sub { + my $orig = shift; + my $self = shift; + my @key_args; + foreach my $arg ( @_ ) { + push( @key_args, + ref( $arg ) eq 'Text::Tradition::Witness' ? $arg->sigil : $arg ); } + return $self->$orig( @key_args ); +}; - return $class->$orig( %member_objects ); +# Don't allow an empty hash value +around 'witness' => sub { + my( $orig, $self, $arg ) = @_; + return unless $self->has_witness( $arg ); + return $self->$orig( $arg ); }; -# The user will usually be instantiating a Tradition object, and -# examining its collation. The information about the tradition can -# come via several routes: -# - graphML from CollateX or elsewhere, standalone -# - TEI parallel segmentation -# - Leuven-style spreadsheet of variants, converted to CSV, plus base text -# - apparatus pulled from CTE, plus base text -# From this we should be able to get basic witness information. -# -# Alternatively the user can just give us the uncollated texts. Then -# instead of passing a collation, s/he is passing a set of witnesses -# from which we will generate a collation. Those witnesses can be in -# plaintext or in TEI with certain constraints adopted. - -# So the constructor for a tradition needs to take one of these infosets, -# and construct the collation and the witness objects. +=head1 NAME + +Text::Tradition - a software model for a set of collated texts + +=head1 SYNOPSIS + + use Text::Tradition; + my $t = Text::Tradition->new( + 'name' => 'this is a text', + 'input' => 'TEI', + 'file' => '/path/to/tei_parallel_seg_file.xml' ); + + my @text_wits = $t->witnesses(); + my $manuscript_a = $t->witness( 'A' ); + + $t = Text::Tradition->new(); + $t->add_witness( 'sourcetype' => 'xmldesc', + 'file' => '/path/to/teitranscription.xml' ); + $t->add_witness( 'sourcetype => 'plaintext', 'sigil' => 'Q', + 'string' => 'The quick brown fox jumped over the lazy dogs' ); + ## TODO + $t->collate_texts; + + my $text_path_svg = $t->collation->as_svg(); + ## See Text::Tradition::Collation for more on text collation itself + +=head1 DESCRIPTION + +Text::Tradition is a library for representation and analysis of collated +texts, particularly medieval ones. A 'tradition' refers to the aggregation +of surviving versions of a text, generally preserved in multiple +manuscripts (or 'witnesses'). A Tradition object thus has one more more +Witnesses, as well as a Collation that represents the unity of all versions +of the text. + +=head1 METHODS + +=head2 new + +Creates and returns a new text tradition object. The following options are +accepted. + +General options: + +=over 4 + +=item B - The name of the text. + +=back + +Initialization based on a collation file: + +=over 4 + +=item B - The input format of the collation file. Can be one of the +following: + +=over 4 + +=item * Self - a GraphML format produced by this module + +=item * CollateX - a GraphML format produced by CollateX + +=item * CTE - a TEI XML format produced by Classical Text Editor + +=item * JSON - an alignment table in JSON format, as produced by CollateX and other tools + +=item * KUL - a specific CSV format for variants, not documented here + +=item * TEI - a TEI parallel segmentation format file + +=item * Tabular - a comma- or tab-separated collation. Takes an additional +option, 'sep_char', which defaults to the tab character. + +=back + +=item B - The name of the file that contains the data. One of 'file' +or 'string' should be specified. + +=item B - A text string that contains the data. One of 'file' or +'string' should be specified. + +=item B - The name of a text file that contains the base text, to be +used with input formats that require it (currently only KUL). + +=back + +Initialization based on a list of witnesses [NOT YET IMPLEMENTED]: + +=over 4 + +=item B - A reference to an array of Text::Tradition::Witness +objects that carry the text to be collated. + +=item B - A reference to a collation program that will accept +Witness objects. + +=back + +=head2 B + +Return the Text::Tradition::Witness objects associated with this tradition, +as an array. + +=head2 B( $sigil ) + +Returns the Text::Tradition::Witness object whose sigil is $sigil, or undef +if there is no such object within the tradition. + +=head2 B( %opts ) + +Instantiate a new witness with the given options (see documentation for +Text::Tradition::Witness) and add it to the tradition. + +=head2 B( $sigil ) + +Delete the witness with the given sigil from the tradition. Returns the +witness object for the deleted witness. + +=begin testing + +use_ok( 'Text::Tradition', "can use module" ); + +my $t = Text::Tradition->new( 'name' => 'empty' ); +is( ref( $t ), 'Text::Tradition', "initialized an empty Tradition object" ); +is( $t->name, 'empty', "object has the right name" ); +is( scalar $t->witnesses, 0, "object has no witnesses" ); + +my $simple = 't/data/simple.txt'; +my $s = Text::Tradition->new( + 'name' => 'inline', + 'input' => 'Tabular', + 'file' => $simple, + ); +is( ref( $s ), 'Text::Tradition', "initialized a Tradition object" ); +is( $s->name, 'inline', "object has the right name" ); +is( scalar $s->witnesses, 3, "object has three witnesses" ); + +my $wit_a = $s->witness('A'); +is( ref( $wit_a ), 'Text::Tradition::Witness', "Found a witness A" ); +if( $wit_a ) { + is( $wit_a->sigil, 'A', "Witness A has the right sigil" ); +} +is( $s->witness('X'), undef, "There is no witness X" ); +ok( !exists $s->{'witnesses'}->{'X'}, "Witness key X not created" ); + +my $wit_d = $s->add_witness( 'sigil' => 'D', 'sourcetype' => 'collation' ); +is( ref( $wit_d ), 'Text::Tradition::Witness', "new witness created" ); +is( $wit_d->sigil, 'D', "witness has correct sigil" ); +is( scalar $s->witnesses, 4, "object now has four witnesses" ); + +my $del = $s->del_witness( 'D' ); +is( $del, $wit_d, "Deleted correct witness" ); +is( scalar $s->witnesses, 3, "object has three witnesses again" ); + +# TODO test initialization by witness list when we have it + +=end testing + +=cut + + +sub BUILD { + my( $self, $init_args ) = @_; + + # First, make a collation object. This will use only those arguments in + # init_args that apply to the collation. + my $collation = Text::Tradition::Collation->new( %$init_args, + 'tradition' => $self ); + $self->_save_collation( $collation ); + + if( exists $init_args->{'input'} ) { + # Call the appropriate parser on the given data + my @format_standalone = qw/ Self CollateText CollateX CTE JSON TEI Tabular /; + my @format_basetext = qw/ KUL /; + my $use_base; + my $format = $init_args->{'input'}; + if( $format && !( grep { $_ eq $format } @format_standalone ) + && !( grep { $_ eq $format } @format_basetext ) ) { + warn "Unrecognized input format $format; not parsing"; + return; + } + if( $format && grep { $_ eq $format } @format_basetext ) { + $use_base = 1; + if( !exists $init_args->{'base'} ) { + warn "Cannot make a collation from $format without a base text"; + return; + } + } + + # Now do the parsing. + if( $format ) { + if( $use_base ) { + $format = 'BaseText'; # Use the BaseText module for parsing, + # but retain the original input arg. + } + my $mod = "Text::Tradition::Parser::$format"; + load( $mod ); + $mod->can('parse')->( $self, $init_args ); + } + } + $self->_init_done( 1 ); + return $self; +} + +=head2 add_json_witnesses( $jsonstring, $options ) + +Adds a set of witnesses from a JSON array specification. This is a wrapper +to parse the JSON and call add_witness (with the specified $options) for +each element therein. + +=cut + +sub add_json_witnesses { + my( $self, $jsonstr, $extraopts ) = @_; + my $witarray = from_json( $jsonstr ); + foreach my $witspec ( @{$witarray->{witnesses}} ) { + my $opts = $extraopts || {}; + $opts->{'sourcetype'} = 'json'; + $opts->{'object'} = $witspec; + $self->add_witness( $opts ); + } +} + +=head2 add_stemma( $dotfile ) + +Initializes a Text::Tradition::Stemma object from the given dotfile, +and associates it with the tradition. + +=begin testing + +use Text::Tradition; + +my $t = Text::Tradition->new( + 'name' => 'simple test', + 'input' => 'Tabular', + 'file' => 't/data/simple.txt', + ); + +is( $t->stemma_count, 0, "No stemmas added yet" ); +my $s; +ok( $s = $t->add_stemma( dotfile => 't/data/simple.dot' ), "Added a simple stemma" ); +is( ref( $s ), 'Text::Tradition::Stemma', "Got a stemma object returned" ); +is( $t->stemma_count, 1, "Tradition claims to have a stemma" ); +is( $t->stemma(0), $s, "Tradition hands back the right stemma" ); + +=end testing + +=cut + +sub add_stemma { + my $self = shift; + my %opts = @_; + my $stemma_fh; + if( $opts{'dotfile'} ) { + open $stemma_fh, '<', $opts{'dotfile'} + or warn "Could not open file " . $opts{'dotfile'}; + } elsif( $opts{'dot'} ) { + my $str = $opts{'dot'}; + open $stemma_fh, '<', \$str; + } + # Assume utf-8 + binmode $stemma_fh, ':utf8'; + my $stemma = Text::Tradition::Stemma->new( + 'collation' => $self->collation, + 'dot' => $stemma_fh ); + $self->_add_stemma( $stemma ) if $stemma; + return $stemma; +} + +sub lemmatize { + my $self = shift; + unless( $self->has_language ) { + warn "Please set a language to lemmatize a tradition"; + return; + } + my $mod = "Text::Tradition::Language::" . $self->language; + load( $mod ); + $mod->can( 'lemmatize' )->( $self ); +} no Moose; __PACKAGE__->meta->make_immutable; + + +=head1 BUGS / TODO + +=over + +=item * Allow tradition to be initialized via passing to a collator. + +=back + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE