Commit | Line | Data |
d9e873d0 |
1 | package Text::Tradition::Parser::Tabular; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Text::CSV_XS; |
6 | |
7 | =head1 NAME |
8 | |
9 | Text::Tradition::Parser::Tabular |
10 | |
11 | =head1 DESCRIPTION |
12 | |
13 | Parser module for Text::Tradition to read an alignment table format, such as CSV. |
14 | |
15 | =head1 METHODS |
16 | |
17 | =over |
18 | |
19 | =item B<parse> |
20 | |
21 | parse( $graph, $graphml_string ); |
22 | |
23 | Takes an initialized Text::Tradition::Graph object and a string |
24 | containing the GraphML; creates the appropriate nodes and edges on the |
25 | graph. |
26 | |
27 | =cut |
28 | |
29 | sub parse { |
dfc37e38 |
30 | my( $tradition, $opts ) = @_; |
d9e873d0 |
31 | my $c = $tradition->collation; # shorthand |
dfc37e38 |
32 | my $csv = Text::CSV_XS->new( { |
33 | binary => 1, # binary for UTF-8 |
34 | sep_char => exists $opts->{'sep_char'} ? $opts->{'sep_char'} : "\t" } |
35 | ); |
36 | # TODO Handle being given a file |
37 | |
d9e873d0 |
38 | my $alignment_table; |
dfc37e38 |
39 | if( exists $opts->{'string' } ) { |
40 | my @lines = split( "\n", $opts->{'string'} ); |
41 | foreach my $l ( @lines ) { |
42 | my $status = $csv->parse( $l ); |
43 | if( $status ) { |
44 | push( @$alignment_table, [ $csv->fields ] ); |
45 | } else { |
46 | warn "Could not parse line $l: " . $csv->error_input; |
47 | } |
48 | } |
49 | } elsif( exists $opts->{'file'} ) { |
50 | open( my $fh, $opts->{'file'} ) or die "Could not open input file " . $opts->{'file'}; |
51 | while( my $row = $csv->getline( $fh ) ) { |
52 | push( @$alignment_table, $row ); |
d9e873d0 |
53 | } |
dfc37e38 |
54 | close $fh; |
55 | } else { |
56 | warn "Could not find string or file option to parse"; |
57 | return; |
d9e873d0 |
58 | } |
dfc37e38 |
59 | |
d9e873d0 |
60 | # Set up the witnesses we find in the first line |
61 | my @witnesses; |
62 | foreach my $sigil ( @{$alignment_table->[0]} ) { |
63 | my $wit = $tradition->add_witness( 'sigil' => $sigil ); |
64 | $wit->path( [ $c->start ] ); |
65 | push( @witnesses, $wit ); |
66 | } |
67 | |
68 | # Now for the next rows, make nodes as necessary, assign their ranks, and |
69 | # add them to the witness paths. |
d9e873d0 |
70 | foreach my $idx ( 1 .. $#{$alignment_table} ) { |
71 | my $row = $alignment_table->[$idx]; |
72 | my $nodes = make_nodes( $c, $row, $idx ); |
73 | foreach my $w ( 0 .. $#{$row} ) { |
74 | # push the appropriate node onto the appropriate witness path |
75 | my $word = $row->[$w]; |
76 | if( $word ) { |
77 | my $reading = $nodes->{$word}; |
78 | my $wit = $witnesses[$w]; |
79 | push( @{$wit->path}, $reading ); |
80 | } # else skip it for empty readings. |
81 | } |
82 | } |
83 | |
eca16057 |
84 | |
85 | # Collapse our lacunae into a single node and |
86 | # push the end node onto all paths. |
d9e873d0 |
87 | $c->end->rank( scalar @$alignment_table ); |
88 | foreach my $wit ( @witnesses ) { |
eca16057 |
89 | my $p = $wit->path; |
90 | my $last_rdg = shift @$p; |
91 | my $new_p = [ $last_rdg ]; |
92 | foreach my $rdg ( @$p ) { |
93 | if( $rdg->text eq '#LACUNA#' ) { |
94 | # If we are in a lacuna already, drop this node. |
95 | # Otherwise make a lacuna node and drop this node. |
96 | unless( $last_rdg->is_lacuna ) { |
97 | my $l = $c->add_lacuna( $rdg->name ); |
98 | $l->rank( $rdg->rank ); |
99 | push( @$new_p, $l ); |
100 | $last_rdg = $l; |
101 | } |
102 | $c->del_reading( $rdg ); |
103 | } else { |
104 | # No lacuna, save the reading. |
105 | push( @$new_p, $rdg ); |
106 | } |
107 | } |
108 | push( @$new_p, $c->end ); |
109 | $wit->path( $new_p ); |
d9e873d0 |
110 | } |
111 | |
112 | # Join up the paths. |
113 | $c->make_witness_paths; |
d9e873d0 |
114 | } |
115 | |
116 | sub make_nodes { |
117 | my( $collation, $row, $index ) = @_; |
118 | my %unique; |
119 | foreach my $w ( @$row ) { |
120 | $unique{$w} = 1 if $w; |
121 | } |
122 | my $ctr = 1; |
123 | foreach my $w ( keys %unique ) { |
124 | my $r = $collation->add_reading( "$index,$ctr" ); |
125 | $ctr++; |
126 | $r->rank( $index ); |
127 | $r->text( $w ); |
128 | $unique{$w} = $r; |
129 | } |
130 | return \%unique; |
131 | } |
132 | |
133 | 1; |