1 package Text::Tradition::Parser::Tabular;
9 Text::Tradition::Parser::Tabular
15 my $t_from_file = Text::Tradition->new(
18 'file' => '/path/to/collation.csv',
22 my $t_from_string = Text::Tradition->new(
25 'string' => $tab_separated_collation,
31 Parser module for Text::Tradition to read an alignment table format, such as CSV.
35 =head2 B<parse>( $tradition, $option_hash )
37 Takes an initialized tradition and a set of options; creates the
38 appropriate nodes and edges on the graph, as well as the appropriate
39 witness objects. The $option_hash must contain either a 'file' or a
40 'string' argument with the table to be parsed; it may also contain a
41 'sep_char' argument to specify how the fields are separated.
43 The table should have witnesses arranged in columns, with the witness sigla
44 in the first row. Empty cells are interpreted as omissions (and thus
45 stemmatologically relevant.) Longer lacunae in the text, to be disregarded
46 in cladistic analysis, may be represented by filling the appropriate cells
47 with the tag '#LACUNA#'.
49 If a witness name ends in the collation's ac_label, it will be treated as
50 an 'ante-correction' version of the 'main' witness whose sigil it shares.
55 binmode STDOUT, ":utf8";
56 binmode STDERR, ":utf8";
57 eval { no warnings; binmode $DB::OUT, ":utf8"; };
59 my $csv = 't/data/florilegium.csv';
60 my $t = Text::Tradition->new(
67 is( ref( $t ), 'Text::Tradition', "Parsed florilegium CSV file" );
69 ### TODO Check these figures
71 is( scalar $t->collation->readings, 311, "Collation has all readings" );
72 is( scalar $t->collation->paths, 361, "Collation has all paths" );
73 is( scalar $t->witnesses, 13, "Collation has all witnesses" );
81 my( $tradition, $opts ) = @_;
82 my $c = $tradition->collation; # shorthand
83 my $csv = Text::CSV_XS->new( {
84 binary => 1, # binary for UTF-8
85 sep_char => exists $opts->{'sep_char'} ? $opts->{'sep_char'} : "\t" }
89 if( exists $opts->{'string' } ) {
90 my @lines = split( "\n", $opts->{'string'} );
91 foreach my $l ( @lines ) {
92 my $status = $csv->parse( $l );
94 push( @$alignment_table, [ $csv->fields ] );
96 warn "Could not parse line $l: " . $csv->error_input;
99 } elsif( exists $opts->{'file'} ) {
100 open( my $fh, $opts->{'file'} )
101 or warn "Could not open input file " . $opts->{'file'};
102 binmode( $fh, ':utf8' );
103 while( my $row = $csv->getline( $fh ) ) {
104 push( @$alignment_table, $row );
108 warn "Could not find string or file option to parse";
112 # Set up the witnesses we find in the first line
114 my %ac_wits; # Track these for later removal
115 foreach my $sigil ( @{$alignment_table->[0]} ) {
116 my $wit = $tradition->add_witness( 'sigil' => $sigil );
117 $wit->path( [ $c->start ] );
118 push( @witnesses, $wit );
119 my $aclabel = $c->ac_label;
120 if( $sigil =~ /^(.*)\Q$aclabel\E$/ ) {
125 # Now for the next rows, make nodes as necessary, assign their ranks, and
126 # add them to the witness paths.
127 foreach my $idx ( 1 .. $#{$alignment_table} ) {
128 my $row = $alignment_table->[$idx];
129 my $nodes = make_nodes( $c, $row, $idx );
130 foreach my $w ( 0 .. $#{$row} ) {
131 # push the appropriate node onto the appropriate witness path
132 my $word = $row->[$w];
134 my $reading = $nodes->{$word};
135 my $wit = $witnesses[$w];
136 push( @{$wit->path}, $reading );
137 } # else skip it for empty readings.
141 # Collapse our lacunae into a single node and
142 # push the end node onto all paths.
143 $c->end->rank( scalar @$alignment_table );
144 foreach my $wit ( @witnesses ) {
146 my $last_rdg = shift @$p;
147 my $new_p = [ $last_rdg ];
148 foreach my $rdg ( @$p ) {
149 # Omit the reading if we are in a lacuna already.
150 next if $rdg->is_lacuna && $last_rdg->is_lacuna;
151 # Save the reading otherwise.
152 push( @$new_p, $rdg );
155 push( @$new_p, $c->end );
156 $wit->path( $new_p );
159 # Fold any a.c. witnesses into their main witness objects, and
160 # delete the independent a.c. versions.
161 foreach my $a ( keys %ac_wits ) {
162 my $main_wit = $tradition->witness( $a );
163 next unless $main_wit;
164 my $ac_wit = $ac_wits{$a};
165 $main_wit->uncorrected_path( $ac_wit->path );
166 $tradition->del_witness( $ac_wit );
170 $c->make_witness_paths;
171 # Delete our unused lacuna nodes.
172 foreach my $rdg ( grep { $_->is_lacuna } $c->readings ) {
173 $c->del_reading( $rdg ) unless $c->reading_witnesses( $rdg );
178 my( $collation, $row, $index ) = @_;
180 foreach my $w ( @$row ) {
181 $unique{$w} = 1 if $w;
184 foreach my $w ( keys %unique ) {
186 'id' => "$index,$ctr",
190 $rargs->{'is_lacuna'} = 1 if $w eq '#LACUNA#';
191 my $r = $collation->add_reading( $rargs );
202 This package is free software and is provided "as is" without express
203 or implied warranty. You can redistribute it and/or modify it under
204 the same terms as Perl itself.
208 Tara L Andrews E<lt>aurum@cpan.orgE<gt>