$self->_save_collation( $collation );
# Call the appropriate parser on the given data
- my @formats = grep { /^(Self|CollateX|CSV|CTE|KUL|TEI)$/ } keys( %$init_args );
+ my @formats = grep { /^(Self|CollateX|CSV|CTE|KUL|TEI|Tabular)$/ } keys( %$init_args );
my $format = shift( @formats );
unless( $format ) {
warn "No data given to create a collation; will initialize an empty one";
use IPC::Run qw( run binary );
use Text::CSV_XS;
use Text::Tradition::Collation::Path;
-use Text::Tradition::Collation::Position;
use Text::Tradition::Collation::Reading;
use Text::Tradition::Collation::Relationship;
use Text::Tradition::Collation::Segment;
my @cmd = qw/dot -Tsvg/;
my( $svg, $err );
my $dotfile = File::Temp->new();
+ ## TODO REMOVE
+ $dotfile->unlink_on_destroy(0);
binmode $dotfile, ':utf8';
print $dotfile $self->as_dot();
push( @cmd, $dotfile->filename );
# Add the data keys for nodes
my %node_data_keys;
my $ndi = 0;
- foreach my $datum ( qw/ name reading identical position class / ) {
+ foreach my $datum ( qw/ name reading identical rank class / ) {
$node_data_keys{$datum} = 'dn'.$ndi++;
my $key = $root->addNewChild( $graphml_ns, 'key' );
$key->setAttribute( 'attr.name', $datum );
$node_el->setAttribute( 'id', $node_xmlid );
_add_graphml_data( $node_el, $node_data_keys{'name'}, $n->name );
_add_graphml_data( $node_el, $node_data_keys{'reading'}, $n->label );
- _add_graphml_data( $node_el, $node_data_keys{'position'}, $n->position->reference )
- if $n->has_position;
+ _add_graphml_data( $node_el, $node_data_keys{'rank'}, $n->rank )
+ if $n->has_rank;
_add_graphml_data( $node_el, $node_data_keys{'class'}, $n->sub_class );
_add_graphml_data( $node_el, $node_data_keys{'identical'}, $n->primary->name )
if $n->has_primary;
$self->graph->rename_node( $new_start, '#START#' );
}
# Make sure the start node has a start position.
- unless( $self->reading( '#START#' )->has_position ) {
- $self->reading( '#START#' )->position( '0,0' );
+ unless( $self->reading( '#START#' )->has_rank ) {
+ $self->reading( '#START#' )->rank( '0' );
}
return $self->reading('#START#');
}
foreach my $idx ( 0 .. $#chain-1 ) {
$self->add_path( $chain[$idx], $chain[$idx+1], $sig );
}
- @chain = @{$wit->uncorrected_path};
- foreach my $idx( 0 .. $#chain-1 ) {
- my $source = $chain[$idx];
- my $target = $chain[$idx+1];
- $self->add_path( $source, $target, $sig.$self->ac_label )
- unless $self->has_path( $source, $target, $sig );
+ if( $wit->has_ante_corr ) {
+ @chain = @{$wit->uncorrected_path};
+ foreach my $idx( 0 .. $#chain-1 ) {
+ my $source = $chain[$idx];
+ my $target = $chain[$idx+1];
+ $self->add_path( $source, $target, $sig.$self->ac_label )
+ unless $self->has_path( $source, $target, $sig );
+ }
}
}
# TODO think about indexing this.
sub readings_at_position {
my( $self, $position, $strict ) = @_;
- unless( ref( $position ) eq 'Text::Tradition::Collation::Position' ) {
- $position = Text::Tradition::Collation::Position->new( $position );
- }
my @answer;
foreach my $r ( $self->readings ) {
push( @answer, $r ) if $r->is_at_position( $position, $strict );
sub common_readings {
my $self = shift;
my @common = grep { $_->is_common } $self->readings();
- return sort { $a->position->cmp_with( $b->position ) } @common;
+ return sort { $a->rank <=> $b->rank } @common;
}
=item B<lemma_readings>
$collation->end( $gnode );
}
}
+
+ # TODO Need to populate $wit->path / uncorrected_path
# Now we have added the witnesses and their paths, so we can
# calculate their explicit positions.
# TODO CollateX does this, and we should just have it exported there.
- $collation->calculate_positions();
+ $collation->calculate_ranks();
}
=back
=cut
-my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $POSITION_KEY, $CLASS_KEY )
- = qw/ name reading identical position class /;
+my( $IDKEY, $TOKENKEY, $TRANSPOS_KEY, $RANK_KEY, $CLASS_KEY )
+ = qw/ name reading identical rank class /;
sub parse {
my( $tradition, $graphml_str ) = @_;
# We evidently have a linear graph.
$linear = 1;
$this_reading->set_identical( $other_reading );
- } elsif ( $edkey eq $POSITION_KEY ) {
- $this_reading->position( $extra_data->{$nkey}->{$edkey} );
+ } elsif ( $edkey eq $RANK_KEY ) {
+ $this_reading->rank( $extra_data->{$nkey}->{$edkey} );
} else {
warn "Unfamiliar reading node data $edkey for $nkey";
}
}
}
$collation->linear( $linear );
-
- # We know what the beginning and ending nodes are, no need to
- # search or reset.
- my $end_node = $collation->reading( '#END#' );
- # Walk the paths and make reading sequences for our witnesses.
- # No need to calculate positions as we have them already.
- $collation->walk_witness_paths( $end_node );
+ # TODO We probably need to set the $witness->path arrays for each wit.
}
=back
--- /dev/null
+package Text::Tradition::Parser::Tabular;
+
+use strict;
+use warnings;
+use Text::CSV_XS;
+
+=head1 NAME
+
+Text::Tradition::Parser::Tabular
+
+=head1 DESCRIPTION
+
+Parser module for Text::Tradition to read an alignment table format, such as CSV.
+
+=head1 METHODS
+
+=over
+
+=item B<parse>
+
+parse( $graph, $graphml_string );
+
+Takes an initialized Text::Tradition::Graph object and a string
+containing the GraphML; creates the appropriate nodes and edges on the
+graph.
+
+=cut
+
+sub parse {
+ my( $tradition, $tab_str ) = @_;
+ # TODO Allow setting of sep_char
+ my $c = $tradition->collation; # shorthand
+ my $csv = Text::CSV_XS->new( { binary => 1 } ); # binary for UTF-8
+ my @lines = split( "\n", $tab_str );
+ # Conveniently, we are basically receiving exactly the sort of alignment table
+ # we might want to produce later. May as well save it.
+ my $alignment_table;
+ foreach my $l ( @lines ) {
+ my $status = $csv->parse( $l );
+ if( $status ) {
+ push( @$alignment_table, [ $csv->fields ] );
+ } else {
+ warn "Could not parse line $l: " . $csv->error_input;
+ }
+ }
+
+ # Set up the witnesses we find in the first line
+ my @witnesses;
+ foreach my $sigil ( @{$alignment_table->[0]} ) {
+ my $wit = $tradition->add_witness( 'sigil' => $sigil );
+ $wit->path( [ $c->start ] );
+ push( @witnesses, $wit );
+ }
+
+ # Now for the next rows, make nodes as necessary, assign their ranks, and
+ # add them to the witness paths.
+ $DB::single = 1;
+ foreach my $idx ( 1 .. $#{$alignment_table} ) {
+ my $row = $alignment_table->[$idx];
+ my $nodes = make_nodes( $c, $row, $idx );
+ foreach my $w ( 0 .. $#{$row} ) {
+ # push the appropriate node onto the appropriate witness path
+ my $word = $row->[$w];
+ if( $word ) {
+ my $reading = $nodes->{$word};
+ my $wit = $witnesses[$w];
+ push( @{$wit->path}, $reading );
+ } # else skip it for empty readings.
+ }
+ }
+
+ # Push the end node onto all paths.
+ $c->end->rank( scalar @$alignment_table );
+ foreach my $wit ( @witnesses ) {
+ push( @{$wit->path}, $c->end );
+ }
+
+ # Join up the paths.
+ $c->make_witness_paths;
+
+ # Save the alignment table that was so handily provided to us.
+ # TODO if we support other delimiters, we will have to re-export this
+ # rather than saving the original string.
+ $c->_save_csv( $tab_str );
+}
+
+sub make_nodes {
+ my( $collation, $row, $index ) = @_;
+ my %unique;
+ foreach my $w ( @$row ) {
+ $unique{$w} = 1 if $w;
+ }
+ my $ctr = 1;
+ foreach my $w ( keys %unique ) {
+ my $r = $collation->add_reading( "$index,$ctr" );
+ $ctr++;
+ $r->rank( $index );
+ $r->text( $w );
+ $unique{$w} = $r;
+ }
+ return \%unique;
+}
+
+1;
\ No newline at end of file
help();
}
-unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX)$/i ) {
+unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX|tab(ular)?)$/i ) {
help( "Input format must be one of CollateX, CSV, CTE, Self, TEI" );
}
$informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i;
$informat = 'CTE' if $informat =~ /^cte$/i;
$informat = 'Self' if $informat =~ /^self$/i;
$informat = 'TEI' if $informat =~ /^tei$/i;
+$informat = 'Tabular' if $informat =~ /^tab$/i;
unless( $outformat =~ /^(graphml|svg|dot|stemma|csv)$/ ) {
help( "Output format must be one of graphml, svg, csv, stemma, or dot" );
unless( $informat eq 'KUL' || $informat eq 'CSV' ) {
my @lines;
open( INFILE, "$input" ) or die "Could not read $input";
+ binmode INFILE, ':utf8';
@lines = <INFILE>;
close INFILE;
$input = join( '', @lines );