1 package Text::Tradition::Parser::CSV;
6 use Text::Tradition::Parser::BaseText qw( merge_base );
10 Text::Tradition::Parser::CSV
14 Parser module for Text::Tradition, given a list of variants as a CSV
15 file and a reference text as a plaintext file with appropriate line
24 parse( $graph, 'variants.csv', 'reference.txt' );
26 Takes an initialized Text::Tradition::Graph object and the relevant
27 data files; puts the text and its variants onto the graph.
32 my( $graph, $csv_file, $base_text ) = @_;
34 # Parse the CSV file into a list of apparatus entries.
35 my @app_list = _read_csv( $csv_file );
36 # Now put the base text onto the graph, and merge in the
38 merge_base( $graph, $base_text, @app_list );
41 # Takes a CSV file; returns a data structure of apparatus entries to
42 # be merged with a base text.
46 my $parser = Text::CSV::Simple->new();
47 my @fields = qw/ reference text variant type context non_corr non_indep
48 length total origin /;
49 my @lines = $parser->read_file( $ARGV[0] );
50 my @labels = @{shift( @lines )};
51 push( @fields, @labels[10..$#labels] );
57 foreach my $line ( @lines ) {
59 if( $line->[0] =~ /^\d/ ) {
60 $new_lemma = $started = 1;
64 # Get the lines into their fields.
66 @linehash{@fields} = @$line;
68 # Readings can take up multiple lines in the CSV, so append the
69 # apparatus to the list, and clear it out, if we have started a
72 push( @app_list, $apparatus ) if keys %$apparatus;
73 $apparatus = { _id => $linehash{reference},
77 # The apparatus has multiple readings, and multiple witnesses per
78 # reading. So it's a hashref whose values are listrefs.
79 $apparatus->{ 'rdg_0' } = $linehash{ 'text' }
80 if $linehash{ 'text' };
81 $apparatus->{ 'rdg_' . ++$rdg_ctr } = $linehash{ 'variant' };
82 foreach my $attr ( @fields[3..8] ) {
83 $apparatus->{"_rdg_${rdg_ctr}_$attr"} = $linehash{ $attr }
84 if $linehash{ $attr };
87 foreach my $k ( @fields[10..$#fields] ) {
88 my $variant_rdg = $linehash{$k};
89 $k =~ s/\s+\(a\.c\.\)//;
90 if( $variant_rdg =~ /^0/ ) {
91 $apparatus->{ $k } = 'rdg_0'
92 unless exists $apparatus->{ $k };
93 } elsif ( $variant_rdg =~ /^1/ ) {
94 $apparatus->{ $k } = 'rdg_' . $rdg_ctr;
95 } else { # else for $, we don't list the MS
96 warn "Unparsed variant indicator $variant_rdg for $k in " .
98 unless ( !$variant_rdg or $variant_rdg =~ /^\$$/ );
101 # See if we have at least one reading for each variant.
102 my @seen_rdgs = values %$apparatus;
103 foreach my $rdg ( grep { $_ =~ /^rdg/ } keys %$apparatus ) {
104 unless( grep { $_ =~ /^$rdg$/ } @seen_rdgs ) {
105 print STDERR 'No manuscript found with reading "'
106 . $apparatus->{$rdg} .
107 '" at location ' . $apparatus->{_id} . "\n";
108 # delete $apparatus->{$rdg}; # for now
112 # Done with loop, so push the last apparatus.
113 push( @app_list, $apparatus );
121 This package is free software and is provided "as is" without express
122 or implied warranty. You can redistribute it and/or modify it under
123 the same terms as Perl itself.
127 Tara L Andrews, aurum@cpan.org