add support for alignment table input
[scpubgit/stemmatology.git] / lib / Text / Tradition / Parser / Tabular.pm
CommitLineData
d9e873d0 1package Text::Tradition::Parser::Tabular;
2
3use strict;
4use warnings;
5use Text::CSV_XS;
6
7=head1 NAME
8
9Text::Tradition::Parser::Tabular
10
11=head1 DESCRIPTION
12
13Parser module for Text::Tradition to read an alignment table format, such as CSV.
14
15=head1 METHODS
16
17=over
18
19=item B<parse>
20
21parse( $graph, $graphml_string );
22
23Takes an initialized Text::Tradition::Graph object and a string
24containing the GraphML; creates the appropriate nodes and edges on the
25graph.
26
27=cut
28
29sub parse {
30 my( $tradition, $tab_str ) = @_;
31 # TODO Allow setting of sep_char
32 my $c = $tradition->collation; # shorthand
33 my $csv = Text::CSV_XS->new( { binary => 1 } ); # binary for UTF-8
34 my @lines = split( "\n", $tab_str );
35 # Conveniently, we are basically receiving exactly the sort of alignment table
36 # we might want to produce later. May as well save it.
37 my $alignment_table;
38 foreach my $l ( @lines ) {
39 my $status = $csv->parse( $l );
40 if( $status ) {
41 push( @$alignment_table, [ $csv->fields ] );
42 } else {
43 warn "Could not parse line $l: " . $csv->error_input;
44 }
45 }
46
47 # Set up the witnesses we find in the first line
48 my @witnesses;
49 foreach my $sigil ( @{$alignment_table->[0]} ) {
50 my $wit = $tradition->add_witness( 'sigil' => $sigil );
51 $wit->path( [ $c->start ] );
52 push( @witnesses, $wit );
53 }
54
55 # Now for the next rows, make nodes as necessary, assign their ranks, and
56 # add them to the witness paths.
57 $DB::single = 1;
58 foreach my $idx ( 1 .. $#{$alignment_table} ) {
59 my $row = $alignment_table->[$idx];
60 my $nodes = make_nodes( $c, $row, $idx );
61 foreach my $w ( 0 .. $#{$row} ) {
62 # push the appropriate node onto the appropriate witness path
63 my $word = $row->[$w];
64 if( $word ) {
65 my $reading = $nodes->{$word};
66 my $wit = $witnesses[$w];
67 push( @{$wit->path}, $reading );
68 } # else skip it for empty readings.
69 }
70 }
71
72 # Push the end node onto all paths.
73 $c->end->rank( scalar @$alignment_table );
74 foreach my $wit ( @witnesses ) {
75 push( @{$wit->path}, $c->end );
76 }
77
78 # Join up the paths.
79 $c->make_witness_paths;
80
81 # Save the alignment table that was so handily provided to us.
82 # TODO if we support other delimiters, we will have to re-export this
83 # rather than saving the original string.
84 $c->_save_csv( $tab_str );
85}
86
87sub make_nodes {
88 my( $collation, $row, $index ) = @_;
89 my %unique;
90 foreach my $w ( @$row ) {
91 $unique{$w} = 1 if $w;
92 }
93 my $ctr = 1;
94 foreach my $w ( keys %unique ) {
95 my $r = $collation->add_reading( "$index,$ctr" );
96 $ctr++;
97 $r->rank( $index );
98 $r->text( $w );
99 $unique{$w} = $r;
100 }
101 return \%unique;
102}
103
1041;