Commit | Line | Data |
d9e873d0 |
1 | package Text::Tradition::Parser::Tabular; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Text::CSV_XS; |
6 | |
7 | =head1 NAME |
8 | |
9 | Text::Tradition::Parser::Tabular |
10 | |
11 | =head1 DESCRIPTION |
12 | |
13 | Parser module for Text::Tradition to read an alignment table format, such as CSV. |
14 | |
15 | =head1 METHODS |
16 | |
17 | =over |
18 | |
19 | =item B<parse> |
20 | |
21 | parse( $graph, $graphml_string ); |
22 | |
23 | Takes an initialized Text::Tradition::Graph object and a string |
24 | containing the GraphML; creates the appropriate nodes and edges on the |
25 | graph. |
26 | |
27 | =cut |
28 | |
29 | sub parse { |
30 | my( $tradition, $tab_str ) = @_; |
31 | # TODO Allow setting of sep_char |
32 | my $c = $tradition->collation; # shorthand |
33 | my $csv = Text::CSV_XS->new( { binary => 1 } ); # binary for UTF-8 |
34 | my @lines = split( "\n", $tab_str ); |
35 | # Conveniently, we are basically receiving exactly the sort of alignment table |
36 | # we might want to produce later. May as well save it. |
37 | my $alignment_table; |
38 | foreach my $l ( @lines ) { |
39 | my $status = $csv->parse( $l ); |
40 | if( $status ) { |
41 | push( @$alignment_table, [ $csv->fields ] ); |
42 | } else { |
43 | warn "Could not parse line $l: " . $csv->error_input; |
44 | } |
45 | } |
46 | |
47 | # Set up the witnesses we find in the first line |
48 | my @witnesses; |
49 | foreach my $sigil ( @{$alignment_table->[0]} ) { |
50 | my $wit = $tradition->add_witness( 'sigil' => $sigil ); |
51 | $wit->path( [ $c->start ] ); |
52 | push( @witnesses, $wit ); |
53 | } |
54 | |
55 | # Now for the next rows, make nodes as necessary, assign their ranks, and |
56 | # add them to the witness paths. |
57 | $DB::single = 1; |
58 | foreach my $idx ( 1 .. $#{$alignment_table} ) { |
59 | my $row = $alignment_table->[$idx]; |
60 | my $nodes = make_nodes( $c, $row, $idx ); |
61 | foreach my $w ( 0 .. $#{$row} ) { |
62 | # push the appropriate node onto the appropriate witness path |
63 | my $word = $row->[$w]; |
64 | if( $word ) { |
65 | my $reading = $nodes->{$word}; |
66 | my $wit = $witnesses[$w]; |
67 | push( @{$wit->path}, $reading ); |
68 | } # else skip it for empty readings. |
69 | } |
70 | } |
71 | |
72 | # Push the end node onto all paths. |
73 | $c->end->rank( scalar @$alignment_table ); |
74 | foreach my $wit ( @witnesses ) { |
75 | push( @{$wit->path}, $c->end ); |
76 | } |
77 | |
78 | # Join up the paths. |
79 | $c->make_witness_paths; |
80 | |
81 | # Save the alignment table that was so handily provided to us. |
82 | # TODO if we support other delimiters, we will have to re-export this |
83 | # rather than saving the original string. |
84 | $c->_save_csv( $tab_str ); |
85 | } |
86 | |
87 | sub make_nodes { |
88 | my( $collation, $row, $index ) = @_; |
89 | my %unique; |
90 | foreach my $w ( @$row ) { |
91 | $unique{$w} = 1 if $w; |
92 | } |
93 | my $ctr = 1; |
94 | foreach my $w ( keys %unique ) { |
95 | my $r = $collation->add_reading( "$index,$ctr" ); |
96 | $ctr++; |
97 | $r->rank( $index ); |
98 | $r->text( $w ); |
99 | $unique{$w} = $r; |
100 | } |
101 | return \%unique; |
102 | } |
103 | |
104 | 1; |