X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FTabular.pm;h=9daed142b2c5f5c5b3bbad0e38471bb813affaed;hb=3b853983204d888a90c029c1e66d77b9fa9642b5;hp=2cf17d093f7789af96d97f124c701cede0ef3fb3;hpb=25331c4994b1ab7b9e4a90c8cc7ef9563a7ecbc4;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/Tabular.pm b/lib/Text/Tradition/Parser/Tabular.pm index 2cf17d0..9daed14 100644 --- a/lib/Text/Tradition/Parser/Tabular.pm +++ b/lib/Text/Tradition/Parser/Tabular.pm @@ -8,6 +8,24 @@ use Text::CSV_XS; Text::Tradition::Parser::Tabular +=head1 SYNOPSIS + + use Text::Tradition; + + my $t_from_file = Text::Tradition->new( + 'name' => 'my text', + 'input' => 'Tabular', + 'file' => '/path/to/collation.csv', + 'sep_char' => ',' + ); + + my $t_from_string = Text::Tradition->new( + 'name' => 'my text', + 'input' => 'Tabular', + 'string' => $tab_separated_collation, + 'sep_char' => "\t", + ); + =head1 DESCRIPTION Parser module for Text::Tradition to read an alignment table format, such as CSV. @@ -16,13 +34,48 @@ Parser module for Text::Tradition to read an alignment table format, such as CSV =over -=item B +=item B( $tradition, $option_hash ) + +Takes an initialized tradition and a set of options; creates the +appropriate nodes and edges on the graph, as well as the appropriate +witness objects. The $option_hash must contain either a 'file' or a +'string' argument with the table to be parsed; it may also contain a +'sep_char' argument to specify how the fields are separated. + +The table should have witnesses arranged in columns, with the witness sigla +in the first row. Empty cells are interpreted as omissions (and thus +stemmatologically relevant.) Longer lacunae in the text, to be disregarded +in cladistic analysis, may be represented by filling the appropriate cells +with the tag '#LACUNA#'. + +If a witness name ends in the collation's ac_label, it will be treated as +an 'ante-correction' version of the 'main' witness whose sigil it shares. + +=begin testing + +use Text::Tradition; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; +eval { no warnings; binmode $DB::OUT, ":utf8"; }; + +my $csv = 't/data/florilegium.csv'; +my $t = Text::Tradition->new( + 'name' => 'inline', + 'input' => 'Tabular', + 'file' => $csv, + 'sep_char' => ',', + ); -parse( $graph, $graphml_string ); +is( ref( $t ), 'Text::Tradition', "Parsed florilegium CSV file" ); -Takes an initialized Text::Tradition::Graph object and a string -containing the GraphML; creates the appropriate nodes and edges on the -graph. +### TODO Check these figures +if( $t ) { + is( scalar $t->collation->readings, 313, "Collation has all readings" ); + is( scalar $t->collation->paths, 2877, "Collation has all paths" ); + is( scalar $t->witnesses, 13, "Collation has all witnesses" ); +} + +=end testing =cut @@ -33,7 +86,6 @@ sub parse { binary => 1, # binary for UTF-8 sep_char => exists $opts->{'sep_char'} ? $opts->{'sep_char'} : "\t" } ); - # TODO Handle being given a file my $alignment_table; if( exists $opts->{'string' } ) { @@ -59,10 +111,15 @@ sub parse { # Set up the witnesses we find in the first line my @witnesses; + my %ac_wits; # Track these for later removal foreach my $sigil ( @{$alignment_table->[0]} ) { my $wit = $tradition->add_witness( 'sigil' => $sigil ); $wit->path( [ $c->start ] ); push( @witnesses, $wit ); + my $aclabel = $c->ac_label; + if( $sigil =~ /^(.*)\Q$aclabel\E$/ ) { + $ac_wits{$1} = $wit; + } } # Now for the next rows, make nodes as necessary, assign their ranks, and @@ -109,6 +166,16 @@ sub parse { $wit->path( $new_p ); } + # Fold any a.c. witnesses into their main witness objects, and + # delete the independent a.c. versions. + foreach my $a ( keys %ac_wits ) { + my $main_wit = $tradition->witness( $a ); + next unless $main_wit; + my $ac_wit = $ac_wits{$a}; + $main_wit->uncorrected_path( $ac_wit->path ); + $tradition->del_witness( $ac_wit ); + } + # Join up the paths. $c->make_witness_paths; } @@ -130,4 +197,14 @@ sub make_nodes { return \%unique; } -1; \ No newline at end of file +1; + +=head1 LICENSE + +This package is free software and is provided "as is" without express +or implied warranty. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=head1 AUTHOR + +Tara L Andrews Eaurum@cpan.orgE