X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition.pm;h=9a0f8b508e273c3357fd0cd40f9cf010e6f559b8;hb=10943ab0b79fbd489f6beb3b81a13ed8cbcfafcf;hp=14b22533a2c50486d34d3a651fac36ad02662b9b;hpb=3b853983204d888a90c029c1e66d77b9fa9642b5;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition.pm b/lib/Text/Tradition.pm index 14b2253..9a0f8b5 100644 --- a/lib/Text/Tradition.pm +++ b/lib/Text/Tradition.pm @@ -1,12 +1,14 @@ package Text::Tradition; +use JSON qw / from_json /; use Module::Load; use Moose; use Text::Tradition::Collation; +use Text::Tradition::Stemma; use Text::Tradition::Witness; use vars qw( $VERSION ); -$VERSION = "0.1"; +$VERSION = "0.5"; has 'collation' => ( is => 'ro', @@ -32,13 +34,43 @@ has 'name' => ( isa => 'Str', default => 'Tradition', ); + +has 'language' => ( + is => 'rw', + isa => 'Str', + predicate => 'has_language', + ); + +has 'stemmata' => ( + traits => ['Array'], + isa => 'ArrayRef[Text::Tradition::Stemma]', + handles => { + stemmata => 'elements', + _add_stemma => 'push', + stemma => 'get', + stemma_count => 'count', + clear_stemmata => 'clear', + }, + default => sub { [] }, + ); +has 'initialized' => ( + is => 'ro', + isa => 'Bool', + default => undef, + writer => '_init_done', + ); + # Create the witness before trying to add it around 'add_witness' => sub { my $orig = shift; my $self = shift; # TODO allow add of a Witness object? - my $new_wit = Text::Tradition::Witness->new( @_ ); + my %args = @_ == 1 ? %{$_[0]} : @_; + $args{'tradition'} = $self; + $args{'language'} = $self->language + if( $self->language && !exists $args{'language'} ); + my $new_wit = Text::Tradition::Witness->new( %args ); $self->$orig( $new_wit->sigil => $new_wit ); return $new_wit; }; @@ -76,7 +108,14 @@ Text::Tradition - a software model for a set of collated texts my @text_wits = $t->witnesses(); my $manuscript_a = $t->witness( 'A' ); - my $new_ms = $t->add_witness( 'sigil' => 'B' ); + + $t = Text::Tradition->new(); + $t->add_witness( 'sourcetype' => 'xmldesc', + 'file' => '/path/to/teitranscription.xml' ); + $t->add_witness( 'sourcetype => 'plaintext', 'sigil' => 'Q', + 'string' => 'The quick brown fox jumped over the lazy dogs' ); + ## TODO + $t->collate_texts; my $text_path_svg = $t->collation->as_svg(); ## See Text::Tradition::Collation for more on text collation itself @@ -120,6 +159,8 @@ following: =item * CTE - a TEI XML format produced by Classical Text Editor +=item * JSON - an alignment table in JSON format, as produced by CollateX and other tools + =item * KUL - a specific CSV format for variants, not documented here =item * TEI - a TEI parallel segmentation format file @@ -157,11 +198,21 @@ Witness objects. Return the Text::Tradition::Witness objects associated with this tradition, as an array. +=head2 B( $sigil ) + +Returns the Text::Tradition::Witness object whose sigil is $sigil, or undef +if there is no such object within the tradition. + =head2 B( %opts ) Instantiate a new witness with the given options (see documentation for Text::Tradition::Witness) and add it to the tradition. +=head2 B( $sigil ) + +Delete the witness with the given sigil from the tradition. Returns the +witness object for the deleted witness. + =begin testing use_ok( 'Text::Tradition', "can use module" ); @@ -181,13 +232,21 @@ is( ref( $s ), 'Text::Tradition', "initialized a Tradition object" ); is( $s->name, 'inline', "object has the right name" ); is( scalar $s->witnesses, 3, "object has three witnesses" ); -my $w = $s->add_witness( 'sigil' => 'D' ); -is( ref( $w ), 'Text::Tradition::Witness', "new witness created" ); -is( $w->sigil, 'D', "witness has correct sigil" ); +my $wit_a = $s->witness('A'); +is( ref( $wit_a ), 'Text::Tradition::Witness', "Found a witness A" ); +if( $wit_a ) { + is( $wit_a->sigil, 'A', "Witness A has the right sigil" ); +} +is( $s->witness('X'), undef, "There is no witness X" ); +ok( !exists $s->{'witnesses'}->{'X'}, "Witness key X not created" ); + +my $wit_d = $s->add_witness( 'sigil' => 'D', 'sourcetype' => 'collation' ); +is( ref( $wit_d ), 'Text::Tradition::Witness', "new witness created" ); +is( $wit_d->sigil, 'D', "witness has correct sigil" ); is( scalar $s->witnesses, 4, "object now has four witnesses" ); my $del = $s->del_witness( 'D' ); -is( $del, $w, "Deleted correct witness" ); +is( $del, $wit_d, "Deleted correct witness" ); is( scalar $s->witnesses, 3, "object has three witnesses again" ); # TODO test initialization by witness list when we have it @@ -199,36 +258,16 @@ is( scalar $s->witnesses, 3, "object has three witnesses again" ); sub BUILD { my( $self, $init_args ) = @_; + + # First, make a collation object. This will use only those arguments in + # init_args that apply to the collation. + my $collation = Text::Tradition::Collation->new( %$init_args, + 'tradition' => $self ); + $self->_save_collation( $collation ); - if( exists $init_args->{'witnesses'} ) { - # We got passed an uncollated list of witnesses. Make a - # witness object for each witness, and then send them to the - # collator. - my $autosigil = 0; - foreach my $wit ( %{$init_args->{'witnesses'}} ) { - # Each item in the list is either a string or an arrayref. - # If it's a string, it is a filename; if it's an arrayref, - # it is a tuple of 'sigil, file'. Handle either case. - my $args; - if( ref( $wit ) eq 'ARRAY' ) { - $args = { 'sigil' => $wit->[0], - 'file' => $wit->[1] }; - } else { - $args = { 'sigil' => chr( $autosigil+65 ), - 'file' => $wit }; - $autosigil++; - } - $self->witnesses->add_witness( $args ); - # TODO Now how to collate these? - } - } else { - # Else we need to parse some collation data. Make a Collation object - my $collation = Text::Tradition::Collation->new( %$init_args, - 'tradition' => $self ); - $self->_save_collation( $collation ); - + if( exists $init_args->{'input'} ) { # Call the appropriate parser on the given data - my @format_standalone = qw/ Self CollateX CTE TEI Tabular /; + my @format_standalone = qw/ Self CollateText CollateX CTE JSON TEI Tabular /; my @format_basetext = qw/ KUL /; my $use_base; my $format = $init_args->{'input'}; @@ -256,34 +295,86 @@ sub BUILD { $mod->can('parse')->( $self, $init_args ); } } + $self->_init_done( 1 ); + return $self; } -=head2 B( $sigil ) +=head2 add_json_witnesses( $jsonstring, $options ) -Returns the Text::Tradition::Witness object whose sigil is $sigil, or undef -if there is no such object within the tradition. +Adds a set of witnesses from a JSON array specification. This is a wrapper +to parse the JSON and call add_witness (with the specified $options) for +each element therein. + +=cut + +sub add_json_witnesses { + my( $self, $jsonstr, $extraopts ) = @_; + my $witarray = from_json( $jsonstr ); + foreach my $witspec ( @{$witarray->{witnesses}} ) { + my $opts = $extraopts || {}; + $opts->{'sourcetype'} = 'json'; + $opts->{'object'} = $witspec; + $self->add_witness( $opts ); + } +} + +=head2 add_stemma( $dotfile ) + +Initializes a Text::Tradition::Stemma object from the given dotfile, +and associates it with the tradition. =begin testing use Text::Tradition; -my $simple = 't/data/simple.txt'; -my $s = Text::Tradition->new( - 'name' => 'inline', +my $t = Text::Tradition->new( + 'name' => 'simple test', 'input' => 'Tabular', - 'file' => $simple, + 'file' => 't/data/simple.txt', ); -my $wit_a = $s->witness('A'); -is( ref( $wit_a ), 'Text::Tradition::Witness', "Found a witness A" ); -if( $wit_a ) { - is( $wit_a->sigil, 'A', "Witness A has the right sigil" ); -} -is( $s->witness('X'), undef, "There is no witness X" ); + +is( $t->stemma_count, 0, "No stemmas added yet" ); +my $s; +ok( $s = $t->add_stemma( dotfile => 't/data/simple.dot' ), "Added a simple stemma" ); +is( ref( $s ), 'Text::Tradition::Stemma', "Got a stemma object returned" ); +is( $t->stemma_count, 1, "Tradition claims to have a stemma" ); +is( $t->stemma(0), $s, "Tradition hands back the right stemma" ); =end testing =cut +sub add_stemma { + my $self = shift; + my %opts = @_; + my $stemma_fh; + if( $opts{'dotfile'} ) { + open $stemma_fh, '<', $opts{'dotfile'} + or warn "Could not open file " . $opts{'dotfile'}; + } elsif( $opts{'dot'} ) { + my $str = $opts{'dot'}; + open $stemma_fh, '<', \$str; + } + # Assume utf-8 + binmode $stemma_fh, ':utf8'; + my $stemma = Text::Tradition::Stemma->new( + 'collation' => $self->collation, + 'dot' => $stemma_fh ); + $self->_add_stemma( $stemma ) if $stemma; + return $stemma; +} + +sub lemmatize { + my $self = shift; + unless( $self->has_language ) { + warn "Please set a language to lemmatize a tradition"; + return; + } + my $mod = "Text::Tradition::Language::" . $self->language; + load( $mod ); + $mod->can( 'lemmatize' )->( $self ); +} + no Moose; __PACKAGE__->meta->make_immutable;