my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %opts );
my $format = shift( @formats );
unless( $format ) {
- warn "No data given to create a graph: need GraphML, CSV, or TEI";
- return;
+ warn "No data given to create a graph; will initialize an empty one";
}
if( $format =~ /^(CSV|CTE)$/ && !exists $opts{'base'} ) {
warn "Cannot make a graph from $format without a base text";
bless( $self, $class );
# Now do the parsing.
- my $mod = "Text::Tradition::Parser::$format";
- load( $mod );
- my @args = ( $opts{ $format } );
- if( $format =~ /^(CSV|CTE)$/ ) {
- push( @args, $opts{'base'} );
+ if( $format ) {
+ my @args;
+ if( $format =~ /^(CSV|CTE)$/ ) {
+ @args = ( 'base' => $opts{'base'},
+ 'data' => $opts{$format},
+ 'format' => $format );
+ $format = 'BaseText';
+ } else {
+ @args = ( $opts{ $format } );
+ }
+ my $mod = "Text::Tradition::Parser::$format";
+ load( $mod );
+ $mod->can('parse')->( $self, @args );
}
- $mod->can('parse')->( $self, @args );
-
return $self;
}
=cut
sub node_sequence {
- my( $self, $start, $end, $label ) = @_;
- # TODO make label able to follow a single MS
+ my( $self, $start, $end, $witness, $backup ) = @_;
unless( ref( $start ) eq 'Graph::Easy::Node'
&& ref( $end ) eq 'Graph::Easy::Node' ) {
warn "Called node_sequence without two nodes!";
return ();
}
- $label = 'base text' unless $label;
+ $witness = 'base text' unless $witness;
my @nodes = ( $start );
my %seen;
my $n = $start;
$seen{$n->name()} = 1;
my @edges = $n->outgoing();
- my @relevant_edges = grep { $_->label =~ /^$label$/ } @edges;
- warn "Did not find an edge $label from node " . $n->label
+ my @relevant_edges = grep { my @w = split( /, /, $_->label );
+ grep { /^\Q$witness\E$/ } @w } @edges;
+ unless( @relevant_edges ) {
+ @relevant_edges = grep { my @w = split( /, /, $_->label );
+ grep { /^\Q$backup\E$/ } @w } @edges
+ if $backup;
+ }
+ unless( @relevant_edges ) {
+ @relevant_edges = grep { $_->label() eq 'base text' } @edges;
+ }
+
+ warn "Did not find an edge for $witness from node " . $n->label
unless scalar @relevant_edges;
- warn "Found more than one edge $label from node " . $n->label
- unless scalar @relevant_edges == 1;
my $next = $relevant_edges[0]->to();
push( @nodes, $next );
$n = $next;
# Check that the last node is our end node.
my $last = $nodes[$#nodes];
warn "Last node found from " . $start->label() .
- " via path $label is not the end!"
+ " for witness $witness is not the end!"
unless $last eq $end;
return @nodes;
# In case this is being called for the first time.
$self->init_lemmatizer();
- if( $self->is_common( $node ) ) {
+ if( !$node || $self->is_common( $node ) ) {
# Do nothing, it's a common node.
return;
}
return @answer;
}
-# A couple of helpers. TODO These should be gathered in the same place
-# eventually
+# A couple of helpers.
sub is_common {
my( $self, $node ) = @_;
use strict;
use warnings;
-use Exporter 'import';
-use vars qw( @EXPORT_OK );
-@EXPORT_OK = qw( merge_base );
+use Module::Load;
=head1 NAME
=over
+=item B<parse>
+
+parse( $graph, %opts );
+
+Takes an initialized graph and a set of options, which must include:
+- 'base' - the base text referenced by the variants
+- 'format' - the format of the variant list
+- 'data' - the variants, in the given format.
+
+=cut
+
+sub parse {
+ my( $graph, %opts ) = @_;
+
+ my $format_mod = 'Text::Tradition::Parser::' . $opts{'format'};
+ load( $format_mod );
+ my @apparatus_entries = $format_mod->can('read')->( $opts{'data'} );
+ merge_base( $graph, $opts{'base'}, @apparatus_entries );
+}
+
=item B<merge_base>
merge_base( $graph, 'reference.txt', @apparatus_entries )
my( $graph, $base_file, @app_entries ) = @_;
my @base_line_starts = read_base( $base_file, $graph );
+ my %all_witnesses;
foreach my $app ( @app_entries ) {
my( $line, $num ) = split( /\./, $app->{_id} );
# DEBUG with a short graph
# Determine the label name for the edges here.
my $edge_name = join(', ', @mss );
+ @all_witnesses{ @mss } = ( 1 ) x scalar( @mss );
# Make the variant into a set of nodes.
my $ctr = 0;
## read. So I collapse nodes that have only one edge in and one edge
## out, and I do this by looking at the edges.
- foreach my $edge ( $graph->edges() ) {
- my @out_edges = $edge->from()->outgoing();
- my @in_edges = $edge->to()->incoming();
+# foreach my $edge ( $graph->edges() ) {
+# my @out_edges = $edge->from()->outgoing();
+# my @in_edges = $edge->to()->incoming();
- next unless scalar( @out_edges ) == 1;
- next unless scalar( @in_edges ) == 1;
- next unless $out_edges[0] eq $in_edges[0];
- # In theory if we've got this far, we're safe, but just to
- # double-check...
- next unless $out_edges[0] eq $edge;
+# next if $edge->from() eq $graph->start();
+# next if $edge->to()->name() eq '#END#';
+# next unless scalar( @out_edges ) == 1;
+# next unless scalar( @in_edges ) == 1;
+# next unless $out_edges[0] eq $in_edges[0];
+# # In theory if we've got this far, we're safe, but just to
+# # double-check...
+# next unless $out_edges[0] eq $edge;
- $graph->merge_nodes( $edge->from(), $edge->to(), ' ' );
+# $graph->merge_nodes( $edge->from(), $edge->to(), ' ' );
+# }
+
+ # Now walk the path for each witness, so that we can do the
+ # position calculations.
+ my $paths = {};
+ foreach my $w ( keys %all_witnesses ) {
+ my $back = undef;
+ if( $w =~ /^(.*)\s*\(p\.\s*c\.\)/ ) {
+ $back = $1;
+ }
+ my @wit_nodes = $graph->node_sequence( $graph->start,
+ $graph->node( '#END#' ),
+ $w, $back );
+ my @wn_names = map { $_->name() } @wit_nodes;
+ $paths->{$w} = \@wn_names;
}
+ $DB::single = 1;
+ my @common_nodes = grep { $graph->is_common( $_ ) } $graph->nodes();
+ $graph->make_positions( \@common_nodes, $paths );
}
=item B<read_base>
my( $graph, @readings ) = @_;
my $lemma_start = shift @readings;
my $lemma_end = shift @readings;
- my $detranspose = 1;
+ my $detranspose = 0;
# Start the list of distinct nodes with those nodes in the lemma.
my @distinct_nodes;
$graph->prior_word( $l, $edgelabel ), $l );
remove_duplicate_edges( $graph, $l,
$graph->next_word( $l, $edgelabel ) );
+ last if $matched;
}
}
push( @remaining_nodes, [ $w, $var_label ] ) unless $matched;
use strict;
use warnings;
use Text::CSV::Simple;
-use Text::Tradition::Parser::BaseText qw( merge_base );
=head1 NAME
=over
-=item B<parse>
+=item B<read>
-parse( $graph, 'variants.csv', 'reference.txt' );
+my @apparatus = read( $csv_file );
-Takes an initialized Text::Tradition::Graph object and the relevant
-data files; puts the text and its variants onto the graph.
+Takes a CSV file; returns a data structure of apparatus entries to be
+merged with a base text.
=cut
-sub parse {
- my( $graph, $csv_file, $base_text ) = @_;
-
- # Parse the CSV file into a list of apparatus entries.
- my @app_list = _read_csv( $csv_file );
- # Now put the base text onto the graph, and merge in the
- # apparatus entries.
- merge_base( $graph, $base_text, @app_list );
-}
-
-# Takes a CSV file; returns a data structure of apparatus entries to
-# be merged with a base text.
-
-sub _read_csv {
+sub read {
my( $csv_file ) = @_;
my $parser = Text::CSV::Simple->new();
my @fields = qw/ reference text variant type context non_corr non_indep