Commit | Line | Data |
d9e873d0 |
1 | package Text::Tradition::Parser::Tabular; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Text::CSV_XS; |
6 | |
7 | =head1 NAME |
8 | |
9 | Text::Tradition::Parser::Tabular |
10 | |
11 | =head1 DESCRIPTION |
12 | |
13 | Parser module for Text::Tradition to read an alignment table format, such as CSV. |
14 | |
15 | =head1 METHODS |
16 | |
17 | =over |
18 | |
19 | =item B<parse> |
20 | |
21 | parse( $graph, $graphml_string ); |
22 | |
23 | Takes an initialized Text::Tradition::Graph object and a string |
24 | containing the GraphML; creates the appropriate nodes and edges on the |
25 | graph. |
26 | |
27 | =cut |
28 | |
29 | sub parse { |
30 | my( $tradition, $tab_str ) = @_; |
31 | # TODO Allow setting of sep_char |
32 | my $c = $tradition->collation; # shorthand |
eca16057 |
33 | my $csv = Text::CSV_XS->new( { binary => 1, # binary for UTF-8 |
34 | sep_char => "\t" } ); |
d9e873d0 |
35 | my @lines = split( "\n", $tab_str ); |
d9e873d0 |
36 | my $alignment_table; |
37 | foreach my $l ( @lines ) { |
38 | my $status = $csv->parse( $l ); |
39 | if( $status ) { |
40 | push( @$alignment_table, [ $csv->fields ] ); |
41 | } else { |
42 | warn "Could not parse line $l: " . $csv->error_input; |
43 | } |
44 | } |
45 | |
46 | # Set up the witnesses we find in the first line |
47 | my @witnesses; |
48 | foreach my $sigil ( @{$alignment_table->[0]} ) { |
49 | my $wit = $tradition->add_witness( 'sigil' => $sigil ); |
50 | $wit->path( [ $c->start ] ); |
51 | push( @witnesses, $wit ); |
52 | } |
53 | |
54 | # Now for the next rows, make nodes as necessary, assign their ranks, and |
55 | # add them to the witness paths. |
d9e873d0 |
56 | foreach my $idx ( 1 .. $#{$alignment_table} ) { |
57 | my $row = $alignment_table->[$idx]; |
58 | my $nodes = make_nodes( $c, $row, $idx ); |
59 | foreach my $w ( 0 .. $#{$row} ) { |
60 | # push the appropriate node onto the appropriate witness path |
61 | my $word = $row->[$w]; |
62 | if( $word ) { |
63 | my $reading = $nodes->{$word}; |
64 | my $wit = $witnesses[$w]; |
65 | push( @{$wit->path}, $reading ); |
66 | } # else skip it for empty readings. |
67 | } |
68 | } |
69 | |
eca16057 |
70 | |
71 | # Collapse our lacunae into a single node and |
72 | # push the end node onto all paths. |
d9e873d0 |
73 | $c->end->rank( scalar @$alignment_table ); |
74 | foreach my $wit ( @witnesses ) { |
eca16057 |
75 | my $p = $wit->path; |
76 | my $last_rdg = shift @$p; |
77 | my $new_p = [ $last_rdg ]; |
78 | foreach my $rdg ( @$p ) { |
79 | if( $rdg->text eq '#LACUNA#' ) { |
80 | # If we are in a lacuna already, drop this node. |
81 | # Otherwise make a lacuna node and drop this node. |
82 | unless( $last_rdg->is_lacuna ) { |
83 | my $l = $c->add_lacuna( $rdg->name ); |
84 | $l->rank( $rdg->rank ); |
85 | push( @$new_p, $l ); |
86 | $last_rdg = $l; |
87 | } |
88 | $c->del_reading( $rdg ); |
89 | } else { |
90 | # No lacuna, save the reading. |
91 | push( @$new_p, $rdg ); |
92 | } |
93 | } |
94 | push( @$new_p, $c->end ); |
95 | $wit->path( $new_p ); |
d9e873d0 |
96 | } |
97 | |
98 | # Join up the paths. |
99 | $c->make_witness_paths; |
d9e873d0 |
100 | } |
101 | |
102 | sub make_nodes { |
103 | my( $collation, $row, $index ) = @_; |
104 | my %unique; |
105 | foreach my $w ( @$row ) { |
106 | $unique{$w} = 1 if $w; |
107 | } |
108 | my $ctr = 1; |
109 | foreach my $w ( keys %unique ) { |
110 | my $r = $collation->add_reading( "$index,$ctr" ); |
111 | $ctr++; |
112 | $r->rank( $index ); |
113 | $r->text( $w ); |
114 | $unique{$w} = $r; |
115 | } |
116 | return \%unique; |
117 | } |
118 | |
119 | 1; |