allow either file or string to be passed for parsing
[scpubgit/stemmatology.git] / lib / Text / Tradition.pm
CommitLineData
dd3b58b0 1package Text::Tradition;
2
4a8828f0 3use Module::Load;
dd3b58b0 4use Moose;
8e1394aa 5use Text::Tradition::Collation;
6use Text::Tradition::Witness;
dd3b58b0 7
8has 'collation' => (
8e1394aa 9 is => 'ro',
10 isa => 'Text::Tradition::Collation',
11 writer => '_save_collation',
12 );
dd3b58b0 13
14has 'witnesses' => (
8e1394aa 15 traits => ['Array'],
8e1394aa 16 isa => 'ArrayRef[Text::Tradition::Witness]',
17 handles => {
910a0a6d 18 witnesses => 'elements',
19 add_witness => 'push',
8e1394aa 20 },
4a8828f0 21 default => sub { [] },
8e1394aa 22 );
c5104dc0 23
df6d9812 24has 'name' => (
25 is => 'rw',
26 isa => 'Str',
27 default => 'Tradition',
28 );
910a0a6d 29
30around 'add_witness' => sub {
31 my $orig = shift;
32 my $self = shift;
33 my $new_wit = Text::Tradition::Witness->new( @_ );
34 $self->$orig( $new_wit );
35 return $new_wit;
36};
37
df6d9812 38
8e1394aa 39sub BUILD {
40 my( $self, $init_args ) = @_;
c5104dc0 41
8e1394aa 42 if( exists $init_args->{'witnesses'} ) {
910a0a6d 43 # We got passed an uncollated list of witnesses. Make a
44 # witness object for each witness, and then send them to the
45 # collator.
46 my $autosigil = 0;
47 foreach my $wit ( %{$init_args->{'witnesses'}} ) {
48 # Each item in the list is either a string or an arrayref.
49 # If it's a string, it is a filename; if it's an arrayref,
50 # it is a tuple of 'sigil, file'. Handle either case.
51 my $args;
52 if( ref( $wit ) eq 'ARRAY' ) {
53 $args = { 'sigil' => $wit->[0],
54 'file' => $wit->[1] };
55 } else {
56 $args = { 'sigil' => chr( $autosigil+65 ),
57 'file' => $wit };
58 $autosigil++;
59 }
60 $self->witnesses->add_witness( $args );
61 # TODO Now how to collate these?
62 }
c5104dc0 63 } else {
910a0a6d 64 # Else we need to parse some collation data. Make a Collation object
65 my $collation = Text::Tradition::Collation->new( %$init_args,
66 'tradition' => $self );
67 $self->_save_collation( $collation );
4a8828f0 68
910a0a6d 69 # Call the appropriate parser on the given data
dfc37e38 70 my @format_standalone = qw/ Self CollateX CSV CTE TEI Tabular /;
71 my @format_basetext = qw/ KUL /;
72 my $use_base;
73 my $format = $init_args->{'input'};
910a0a6d 74 unless( $format ) {
75 warn "No data given to create a collation; will initialize an empty one";
76 }
dfc37e38 77 if( $format && !( grep { $_ eq $format } @format_standalone )
78 && !( grep { $_ eq $format } @format_basetext ) ) {
79 warn "Unrecognized input format $format; not parsing";
910a0a6d 80 return;
81 }
dfc37e38 82 if( $format && grep { $_ eq $format } @format_basetext ) {
83 $use_base = 1;
84 if( !exists $init_args->{'base'} ) {
85 warn "Cannot make a collation from $format without a base text";
86 return;
87 }
88 }
4a8828f0 89
910a0a6d 90 # Now do the parsing.
910a0a6d 91 if( $format ) {
dfc37e38 92 if( $use_base ) {
93 $format = 'BaseText'; # Use the BaseText module for parsing,
94 # but retain the original input arg.
910a0a6d 95 }
96 my $mod = "Text::Tradition::Parser::$format";
97 load( $mod );
dfc37e38 98 $mod->can('parse')->( $self, $init_args );
910a0a6d 99 }
c5104dc0 100 }
8e1394aa 101}
c5104dc0 102
de51424a 103sub witness {
104 my( $self, $sigil ) = @_;
105 my $requested_wit;
910a0a6d 106 foreach my $wit ( $self->witnesses ) {
ecff899f 107 if( $wit->sigil eq $sigil ) {
108 $requested_wit = $wit;
109 last;
110 }
de51424a 111 }
e2902068 112 # We depend on an undef return value for no such witness.
113 # warn "No such witness $sigil" unless $requested_wit;
de51424a 114 return $requested_wit;
115}
ecff899f 116
910a0a6d 117
4a8828f0 118
dd3b58b0 119# The user will usually be instantiating a Tradition object, and
120# examining its collation. The information about the tradition can
121# come via several routes:
122# - graphML from CollateX or elsewhere, standalone
123# - TEI parallel segmentation
124# - Leuven-style spreadsheet of variants, converted to CSV, plus base text
125# - apparatus pulled from CTE, plus base text
126# From this we should be able to get basic witness information.
127#
128# Alternatively the user can just give us the uncollated texts. Then
129# instead of passing a collation, s/he is passing a set of witnesses
130# from which we will generate a collation. Those witnesses can be in
131# plaintext or in TEI with certain constraints adopted.
132
133# So the constructor for a tradition needs to take one of these infosets,
134# and construct the collation and the witness objects.
135
136no Moose;
137__PACKAGE__->meta->make_immutable;