=head1 SYNOPSIS
-use Text::Tradition::Graph;
+ use Text::Tradition::Graph;
-my $text = Text::Tradition::Graph->new( 'GraphML' => '/my/graphml/file.xml' );
-my $text = Text::Tradition::Graph->new( 'TEI' => '/my/tei/file.xml' );
-my $text = Text::Tradition::Graph->new( 'CSV' => '/my/csv/file.csv',
- 'base' => '/my/basefile.txt' );
-my $text = Text::Tradition::Graph->new( 'CTE' => '/my/cte/file.txt',
- 'base' => '/my/basefile.txt' );
+ my $text = Text::Tradition::Graph->new( 'GraphML' => '/my/graphml/file.xml' );
+ my $text = Text::Tradition::Graph->new( 'TEI' => '/my/tei/file.xml' );
+ my $text = Text::Tradition::Graph->new( 'CSV' => '/my/csv/file.csv',
+ 'base' => '/my/basefile.txt' );
+ my $text = Text::Tradition::Graph->new( 'CTE' => '/my/cte/file.txt',
+ 'base' => '/my/basefile.txt' );
-my $svg_string = $text->as_svg();
+ my $svg_string = $text->as_svg();
-my $lemma_nodes = $text->active_nodes();
-$text->toggle_node( 'some_word' );
+ my $lemma_nodes = $text->active_nodes();
+ $text->toggle_node( 'some_word' );
=head1 DESCRIPTION
Constructor. Takes a source collation file from which to construct
the initial graph. This file can be TEI (parallel segmentation) XML,
-CSV in a format yet to be documented, GraphML as documented (someday)
-by CollateX, or a Classical Text Editor apparatus. For CSV and
-Classical Text Editor files, the user must also supply a base text to
-which the line numbering in the collation file refers.
+CSV in a format yet to be documented, GraphML as documented by the
+CollateX tool (L<http://gregor.middell.net/collatex/>), or a Classical
+Text Editor apparatus. For CSV and Classical Text Editor files, the
+user must also supply a base text to which the line numbering in the
+collation file refers.
+
+20/04/2011 Currently only CSV and GraphML are really supported.
=cut
return $self;
}
+=item B<make_positions>
+
+$graph->make_positions( $common_nodes, $paths )
+
+Create an associated Graph::Positions object that records the position
+of each node in the graph. This method call is probably in the wrong
+place and will move.
+
+=cut
+
+sub make_positions {
+ my( $self, $common_nodes, $paths ) = @_;
+ my $positions = Text::Tradition::Graph::Position->new( $common_nodes, $paths );
+ $self->{'positions'} = $positions;
+}
+
+=back
+
+=head2 Graph::Easy object accessor methods
+
+See the Graph::Easy documentation for descriptions of these functions.
+
+=over
+
+=item B<node>
+
+=cut
-### Graph::Easy object accessor methods
sub node {
my $self = shift;
return $self->{'graph'}->node( @_ );
}
+=item B<edge>
+
+=cut
+
sub edge {
my $self = shift;
return $self->{'graph'}->edge( @_ );
}
+=item B<add_node>
+
+=cut
+
# Not only adds the node, but also initializes internal data
# about the node.
+
sub add_node {
my $self = shift;
my $node = $self->{'graph'}->add_node( @_ );
return $node;
}
+=item B<add_edge>
+
+=cut
+
sub add_edge {
my $self = shift;
return $self->{'graph'}->add_edge( @_ );
}
+=item B<del_node>
+
+=cut
+
sub del_node {
my $self = shift;
my $node = $_[0];
return $self->{'graph'}->del_node( @_ );
}
+=item B<del_edge>
+
+=cut
+
sub del_edge {
my $self = shift;
return $self->{'graph'}->del_edge( @_ );
}
+=item B<nodes>
+
+=cut
+
sub nodes {
my $self = shift;
return $self->{'graph'}->nodes( @_ );
}
+=item B<edges>
+
+=cut
+
sub edges {
my $self = shift;
return $self->{'graph'}->edges( @_ );
}
+=item B<merge_nodes>
+
+=cut
+
sub merge_nodes {
my $self = shift;
return $self->{'graph'}->merge_nodes( @_ );
### Helper methods for navigating the tree
+=back
+
+=head2 Graph navigation methods
+
+=over
+
+=item B<start>
+
+my $node = $graph->start();
+
+Returns the beginning node of the graph.
+
+=cut
+
sub start {
# Return the beginning node of the graph.
my $self = shift;
return $self->{'graph'}->node('#START#');
}
-# Record that nodes A and B are really the same (transposed) node.
-# We do this by maintaining some pools of transposed nodes, and
-# we have a lookup hash so that each member of that
-sub set_identical_node {
- my( $self, $node, $same_node ) = @_;
- my $pool = $self->{'identical_nodes'}->{ $node };
- my $same_pool = $self->{'identical_nodes'}->{ $same_node };
- my %poolhash;
- foreach ( @$pool ) {
- $poolhash{$_} = 1;
- }
- foreach( @$same_pool ) {
- push( @$pool, $_ ) unless $poolhash{$_};
- }
+=item B<next_word>
- $self->{'identical_nodes'}->{ $same_node } = $pool;
-}
+my $next_node = $graph->next_word( $node, $path );
-sub identical_nodes {
- my( $self, $node ) = @_;
- my @others = grep { $_ !~ /^$node$/ }
- @{$self->{'identical_nodes'}->{ $node }};
- return @others;
-}
+Returns the node that follows the given node along the given witness
+path. TODO These are badly named.
+
+=cut
sub next_word {
# Return the successor via the corresponding edge.
return undef;
}
+=item B<prior_word>
+
+my $prior_node = $graph->prior_word( $node, $path );
+
+Returns the node that precedes the given node along the given witness
+path. TODO These are badly named.
+
+=cut
+
sub prior_word {
# Return the predecessor via the corresponding edge.
my( $self, $node, $edge ) = @_;
return undef;
}
+=item B<node_sequence>
+
+my @nodes = $graph->node_sequence( $first, $last, $path );
+
+Returns the ordered list of nodes, starting with $first and ending
+with $last, along the given witness path.
+
+=cut
+
sub node_sequence {
my( $self, $start, $end, $label ) = @_;
# TODO make label able to follow a single MS
return @nodes;
}
+=item B<string_lemma>
+
+my $text = $graph->string_lemma( $first, $last, $path );
+
+Returns the whitespace-separated text, starting with $first and ending
+with $last, represented in the graph along the given path.
+
+=cut
+
sub string_lemma {
my( $self, $start, $end, $label ) = @_;
return join( ' ', @words );
}
-## Output. We use GraphViz for the layout because it handles large
-## graphs better than Graph::Easy does natively.
+=back
+
+=head2 Transposition handling methods
+
+These should really move to their own module. For use when the graph
+has split transposed nodes in order to avoid edges that travel
+backward.
+
+=over
+
+=item B<set_identical_node>
+
+$graph->set_identical_node( $node, $other_node )
+
+Tell the graph that these two nodes contain the same (transposed) reading.
+
+=cut
+
+sub set_identical_node {
+ my( $self, $node, $same_node ) = @_;
+ my $pool = $self->{'identical_nodes'}->{ $node };
+ my $same_pool = $self->{'identical_nodes'}->{ $same_node };
+ my %poolhash;
+ foreach ( @$pool ) {
+ $poolhash{$_} = 1;
+ }
+ foreach( @$same_pool ) {
+ push( @$pool, $_ ) unless $poolhash{$_};
+ }
+
+ $self->{'identical_nodes'}->{ $same_node } = $pool;
+}
+
+=item B<set_identical_node>
+
+my @nodes = $graph->identical_nodes( $node )
+
+Get a list of nodes that contain the same (transposed) reading as the
+given node.
+
+=cut
+
+sub identical_nodes {
+ my( $self, $node ) = @_;
+ my @others = grep { $_ !~ /^$node$/ }
+ @{$self->{'identical_nodes'}->{ $node }};
+ return @others;
+}
+
+=back
+
+=head2 Output method(s)
+
+=over
+
+=item B<as_svg>
+
+print $graph->as_svg( $recalculate );
+
+Returns an SVG string that represents the graph. Uses GraphViz to do
+this, because Graph::Easy doesn't cope well with long graphs. Unless
+$recalculate is passed (and is a true value), the method will return a
+cached copy of the SVG after the first call to the method.
+
+=cut
sub as_svg {
my( $self, $recalc ) = @_;
return $svg;
}
-## Methods for lemmatizing a text.
+=back
+
+=head2 Lemmatization methods
+
+=over
+
+=item B<init_lemmatizer>
+
+=cut
sub init_lemmatizer {
my $self = shift;
}
-sub make_positions {
- my( $self, $common_nodes, $paths ) = @_;
- my $positions = Text::Tradition::Graph::Position->new( $common_nodes, $paths );
- $self->{'positions'} = $positions;
-}
-
# Takes a list of nodes that have just been turned off, and returns a
# set of tuples of the form ['node', 'state'] that indicates what
# changes need to be made to the graph.
return keys( %h );
}
+=back
+
+=head1 LICENSE
+
+This package is free software and is provided "as is" without express
+or implied warranty. You can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Tara L Andrews, aurum@cpan.org
+
+=cut
+
1;
=head1 SUMMARY
An object to go with a text graph that keeps track of relative
-positions of the nodes.
+positions of the nodes on that graph. This is useful for keeping
+track of which readings are variants of each other, which is expensive
+to calculate every time from the graph itself.
=head1 METHODS
=cut
+# TODO Why not just hand over the graph and calculate the common nodes
+# and witness paths here?
sub new {
my $proto = shift;
my( $common_nodes, $witness_paths ) = @_;
return $self;
}
+=item B<node_position>
+
+my $pos = $positions->node_position( $node );
+
+Returns the position identifier for a given node in the graph.
+
+=cut
+
sub node_position {
my( $self, $node ) = @_;
$node = _name( $node );
return $self->{'node_positions'}->{ $node };
}
+=item B<nodes_at_position>
+
+my @nodes = $positions->nodes_at_position( $pos );
+
+Returns the names of all the nodes in the graph at a given position.
+
+=cut
+
sub nodes_at_position {
my( $self, $pos ) = @_;
- my $positions = $self->calc_positions();
+ my $positions = $self->_calc_positions();
unless( exists $positions->{ $pos } ) {
warn "No position $pos in the graph";
return;
return @{ $positions->{ $pos }};
}
+=item B<colocated_nodes>
+
+my @nodes = $positions->colocated_nodes( $node );
+
+Returns the names of all the nodes in the graph at the same position
+as the node given, apart from that node itself.
+
+=cut
+
sub colocated_nodes {
my( $self, $node ) = @_;
$node = _name( $node );
return @cn;
}
-# Returns an ordered list of positions in this graph
+=item B<all>
+
+my @position_list = $positions->all()
+
+Returns an ordered list of positions in the graph.
+
+=cut
+
sub all {
my( $self ) = @_;
- my $pos = $self->calc_positions;
+ my $pos = $self->_calc_positions;
return sort by_position keys( %$pos );
}
-# Returns undef if no decision has been taken on this position, the
-# node name if there is a lemma for it, and 0 if there is no lemma for
-# it.
-sub state {
- my( $self, $pos ) = @_;
- return $self->{'position_state'}->{ $pos };
+sub witness_path {
+ my( $self, $wit ) = @_;
+ return @{$self->{'witness_paths'}->{ $wit }};
}
-sub set_state {
- my( $self, $pos, $state ) = @_;
- $self->{'position_state'}->{ $pos } = $state;
-}
+=back
+
+=head2 Lemmatization functions
+
+For some traditions, each position will have at least one node that is
+the 'lemma text', that is, the text that an editor has chosen to stand
+as authoritative for the tradition. The following methods keep
+track of what lemma, if any, should stand at each position.
+
+=over
+=item B<init_lemmatizer>
+
+$positions->init_lemmatizer( @nodelist )
+
+Sets up the necessary logic for keeping track of lemmas. It should be
+called once, with the initial list of lemmas.
+
+=cut
+
+# TODO We can initialize this without the argument, based on the passed
+# list of common nodes.
sub init_lemmatizer {
my( $self, @nodes ) = @_;
foreach my $n ( @nodes ) {
}
}
-sub witness_path {
- my( $self, $wit ) = @_;
- return @{$self->{'witness_paths'}->{ $wit }};
-}
+=item B<state>
-# At some point I may find myself using scalar references for the node
-# positions, in order to keep them easily in sync. Just in case, I will
-# calculate this every time I need it.
-sub calc_positions {
- my $self = shift;
- return _invert_hash( $self->{'node_positions'} )
+my $answer = $positions->state( $position_id )
+
+For the given position ID, returns the node (if any) that stands at
+the lemma. If no node should stand as lemma at this position, returns
+0; if no decision has been made for this position, returns undef.
+
+=cut
+
+sub state {
+ my( $self, $pos ) = @_;
+ return $self->{'position_state'}->{ $pos };
}
-# Helper for dealing with node refs
-sub _name {
- my( $node ) = @_;
- # We work with node names in this library
- if( ref( $node ) && ref( $node ) eq 'Graph::Easy::Node' ) {
- $node = $node->name();
- }
- return $node;
+=item B<set_state>
+
+$positions->set_state( $position_id, $state )
+
+For the given position ID, sets the lemma (if any). State can be the
+name of a node, 0 (for cases when no lemma should stand), or undef
+(for cases when no decision has been made).
+
+=cut
+
+sub set_state {
+ my( $self, $pos, $state ) = @_;
+ $self->{'position_state'}->{ $pos } = $state;
}
-### Comparison functions
+=back
+
+=head2 Comparison function
+
+=over
+
+=item B<by_position>
+
+my @nodelist = sort $positions->by_position @nodelist;
+
+For use in the 'sort' function. Returns a comparison value based on
+the position of the given nodes.
+
+=cut
# Compares two nodes according to their positions in the witness
# index hash.
return $pos_a[1] <=> $pos_b[1];
}
+
+#### HELPER FUNCTIONS ####
+
+# At some point I may find myself using scalar references for the node
+# positions, in order to keep them easily in sync. Just in case, I will
+# calculate this every time I need it.
+sub _calc_positions {
+ my $self = shift;
+ return _invert_hash( $self->{'node_positions'} )
+}
+
+# Helper for dealing with node refs
+sub _name {
+ my( $node ) = @_;
+ # We work with node names in this library
+ if( ref( $node ) && ref( $node ) eq 'Graph::Easy::Node' ) {
+ $node = $node->name();
+ }
+ return $node;
+}
+
# Useful helper. Will be especially useful if I find myself using
# scalar references for the positions after all - it can dereference
# them here.
return \%new_hash;
}
+=back
+
+=head1 LICENSE
+
+This package is free software and is provided "as is" without express
+or implied warranty. You can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Tara L Andrews, aurum@cpan.org
+
+=cut
+
1;
use vars qw( @EXPORT_OK );
@EXPORT_OK = qw( merge_base );
+=head1 NAME
+
+Text::Tradition::Parser::BaseText
+
+=head1 SYNOPSIS
+
+use Text::Tradition::Parser::BaseText qw( merge_base );
+merge_base( $graph, 'reference.txt', @apparatus_entries )
+
+=head1 DESCRIPTION
+
+For an overview of the package, see the documentation for the
+Text::Tradition::Graph module.
+
+This module is meant for use with certain of the other Parser classes
+- whenever a list of variants is given with reference to a base text,
+these must be joined into a single collation. The parser should
+therefore make a list of variants and their locations, and BaseText
+will join those listed variants onto the reference text.
+
+=head1 SUBROUTINES
+
+=over
+
+=item B<merge_base>
+
+merge_base( $graph, 'reference.txt', @apparatus_entries )
+
+Takes three arguments: a newly-initialized Text::Tradition::Graph
+object, a text file containing the reference text, and a list of
+variants (apparatus entries). Adds the base text to the graph, and
+joins the variants to that.
+
+The list of variants is an array of hash references; each hash takes
+the form
+ { '_id' => line reference,
+ 'rdg_0' => lemma reading,
+ 'rdg_1' => first variant,
+ ... # and so on until all distinct readings are listed
+ 'WitnessA' => 'rdg_0',
+ 'WitnessB' => 'rdg_1',
+ ... # and so on until all witnesses are listed with their readings
+ }
+
+Any hash key that is not of the form /^rdg_\d+$/ and that does not
+begin with an underscore is assumed to be a witness name. Any 'meta'
+information to be passed must be passed in a key with a leading
+underscore in its name.
+
+=cut
+
sub merge_base {
my( $graph, $base_file, @app_entries ) = @_;
my @base_line_starts = read_base( $base_file, $graph );
my( $line, $num ) = split( /\./, $app->{_id} );
# DEBUG with a short graph
# last if $line > 2;
- my $scrutinize = "21.8";
+ # DEBUG for problematic entries
+ # my $scrutinize = "21.8";
my $first_line_node = $base_line_starts[ $line ];
my $too_far = $base_line_starts[ $line+1 ];
}
}
-# read_base: Takes a text file and a (presumed empty) graph object,
-# adds the words as simple linear nodes to the graph, and returns a
-# list of nodes that represent the beginning of lines. This graph is
-# now the starting point for application of apparatus entries in
-# merge_base, e.g. from a CSV file or a CTE file.
+=item B<read_base>
+
+my @line_beginnings = read_base( 'reference.txt', $graph );
+
+Takes a text file and a (presumed empty) graph object, adds the words
+as simple linear nodes to the graph, and returns a list of nodes that
+represent the beginning of lines. This graph is now the starting point
+for application of apparatus entries in merge_base, e.g. from a CSV
+file or a Classical Text Editor file.
+
+=cut
sub read_base {
my( $base_file, $graph ) = @_;
return( @$lineref_array );
}
+=item B<collate_variant>
+
+collate_variant( $graph, $lemma_start, $lemma_end, $var_start, $var_end );
+
+Given a lemma and a variant as start- and endpoints on the graph,
+walks through each to identify those nodes that are identical. The
+graph is a Text::Tradition::Graph object; the other arguments are
+Graph::Easy::Node objects that appear on the graph.
-## Helper methods for merge_base
+TODO: Handle collapsed and non-collapsed transpositions.
+
+=cut
sub collate_variant {
my( $graph, $lemma_start, $lemma_end, $var_start, $var_end ) = @_;
}
}
+=item B<remove_duplicate_edges>
+
+remove_duplicate_edges( $graph, $from, $to );
+
+Given two nodes, reduce the number of edges between those nodes to
+one. If neither edge represents a base text, combine their labels.
+
+=cut
+
sub remove_duplicate_edges {
my( $graph, $from, $to ) = @_;
my @edges = $from->edges_to( $to );
}
}
-# TODO need to make this configurable!
+=item B<cmp_str>
+
+Pretend you never saw this method. Really it needs to not be hardcoded.
+
+=cut
+
sub cmp_str {
my( $node ) = @_;
my $word = $node->label();
return $word;
}
+=back
+
+=head1 LICENSE
+
+This package is free software and is provided "as is" without express
+or implied warranty. You can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Tara L Andrews, aurum@cpan.org
+
+=cut
+
1;
use Text::CSV::Simple;
use Text::Tradition::Parser::BaseText qw( merge_base );
-# Takes a CSV file and a base text; returns a GraphML object.
+=head1 NAME
+
+Text::Tradition::Parser::CSV
+
+=head1 DESCRIPTION
+
+Parser module for Text::Tradition, given a list of variants as a CSV
+file and a reference text as a plaintext file with appropriate line
+breaks.
+
+=head1 METHODS
+
+=over
+
+=item B<parse>
+
+parse( $graph, 'variants.csv', 'reference.txt' );
+
+Takes an initialized Text::Tradition::Graph object and the relevant
+data files; puts the text and its variants onto the graph.
+
+=cut
sub parse {
my( $graph, $csv_file, $base_text ) = @_;
# Parse the CSV file into a list of apparatus entries.
- my @app_list = read_csv( $csv_file );
+ my @app_list = _read_csv( $csv_file );
# Now put the base text onto the graph, and merge in the
# apparatus entries.
merge_base( $graph, $base_text, @app_list );
# Takes a CSV file; returns a data structure of apparatus entries to
# be merged with a base text.
-sub read_csv {
+sub _read_csv {
my( $csv_file ) = @_;
my $parser = Text::CSV::Simple->new();
my @fields = qw/ reference text variant type context non_corr non_indep
return @app_list;
}
+=back
+
+=head1 LICENSE
+
+This package is free software and is provided "as is" without express
+or implied warranty. You can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Tara L Andrews, aurum@cpan.org
+
+=cut
+
1;
use XML::LibXML;
use XML::LibXML::XPathContext;
+=head1 NAME
-# Takes a GraphML string; returns a Graph::Easy object.
+Text::Tradition::Parser::GraphML
+
+=head1 DESCRIPTION
+
+Parser module for Text::Tradition, given a GraphML file that describes
+a collation graph. For further information on the GraphML format for
+text collation, see http://gregor.middell.net/collatex/
+
+=head1 METHODS
+
+=over
+
+=item B<parse>
+
+parse( $graph, $graphml_string );
+
+Takes an initialized Text::Tradition::Graph object and a string
+containing the GraphML; creates the appropriate nodes and edges on the
+graph.
+
+=cut
sub parse {
my( $graph, $graphml_str ) = @_;
}
+=back
+
+=head1 LICENSE
+
+This package is free software and is provided "as is" without express
+or implied warranty. You can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Tara L Andrews, aurum@cpan.org
+
+=cut
+
1;