From: Tara L Andrews Date: Tue, 17 May 2011 21:49:40 +0000 (+0200) Subject: remove old files, no longer used X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=35b673490219985a6effca9978d5d3353008bab5;p=scpubgit%2Fstemmatology.git remove old files, no longer used --- diff --git a/lib/Text/Tradition/Graph.pm b/lib/Text/Tradition/Graph.pm deleted file mode 100644 index 1e0f02f..0000000 --- a/lib/Text/Tradition/Graph.pm +++ /dev/null @@ -1,825 +0,0 @@ -package Text::Tradition::Graph; - -use strict; -use warnings; -use Graph::Easy; -use IPC::Run qw( run binary ); -use Module::Load; -use Text::Tradition::Graph::Position; - -=head1 NAME - -Text::Tradition::Graph - -=head1 SYNOPSIS - - use Text::Tradition::Graph; - - my $text = Text::Tradition::Graph->new( 'GraphML' => '/my/graphml/file.xml' ); - my $text = Text::Tradition::Graph->new( 'TEI' => '/my/tei/file.xml' ); - my $text = Text::Tradition::Graph->new( 'CSV' => '/my/csv/file.csv', - 'base' => '/my/basefile.txt' ); - my $text = Text::Tradition::Graph->new( 'CTE' => '/my/cte/file.txt', - 'base' => '/my/basefile.txt' ); - - my $svg_string = $text->as_svg(); - - my $lemma_nodes = $text->active_nodes(); - $text->toggle_node( 'some_word' ); - -=head1 DESCRIPTION - -A text tradition is the representation of our knowledge of a text that -has been passed down via manuscript copies from a time before printing -presses. Each text has a number of witnesses, that is, manuscripts -that bear a version of the text. The tradition is the aggregation of -these witnesses, which is to say, the collation of the text. - -This module takes a text collation and represents it as a horizontal -directed graph, suitable for SVG rendering and for analysis of various -forms. Since this module was written by a medievalist, it also -provides a facility for making a critical text reconstruction by -choosing certain variants to be 'lemma' text - that is, text which -should be considered the 'standard' reading. - -Although the graph is a very good way to render text collation, and is -visually very easy for a human to interpret, it doesn't have any -inherent information about which nodes 'go together' - that is, which -text readings appear in the same place as other readings. This module -therefore calculates 'positions' on the graph, thus holding some -information about which readings can and can't be substituted for -others. - -=head1 METHODS - -=over 4 - -=item B - -Constructor. Takes a source collation file from which to construct -the initial graph. This file can be TEI (parallel segmentation) XML, -CSV in a format yet to be documented, GraphML as documented by the -CollateX tool (L), or a Classical -Text Editor apparatus. For CSV and Classical Text Editor files, the -user must also supply a base text to which the line numbering in the -collation file refers. - -20/04/2011 Currently only CSV and GraphML are really supported. - -=cut - -sub new { - my $proto = shift; - my $class = ref( $proto ) || $proto; - my %opts = ( 'on_color' => 'yellow', - 'off_color' => 'white', - @_ ); - my $self = {}; - - # opts can be: GraphML, base+CSV, base+CTE, TEI. We need - # something to parse. - my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %opts ); - my $format = shift( @formats ); - unless( $format ) { - warn "No data given to create a graph; will initialize an empty one"; - } - if( $format =~ /^(CSV|CTE)$/ && !exists $opts{'base'} ) { - warn "Cannot make a graph from $format without a base text"; - return; - } - - # Make a graph object. - my $collation_graph = Graph::Easy->new(); - $collation_graph->set_attribute( 'node', 'shape', 'ellipse' ); - # Starting point for all texts - my $last_node = $collation_graph->add_node( '#START#' ); - - $self->{'graph'} = $collation_graph; - bless( $self, $class ); - - # Now do the parsing. - if( $format ) { - my @args; - if( $format =~ /^(CSV|CTE)$/ ) { - @args = ( 'base' => $opts{'base'}, - 'data' => $opts{$format}, - 'format' => $format ); - $format = 'BaseText'; - } else { - @args = ( $opts{ $format } ); - } - my $mod = "Text::Tradition::Parser::$format"; - load( $mod ); - $mod->can('parse')->( $self, @args ); - } - return $self; -} - -=item B - -$graph->make_positions( $common_nodes, $paths ) - -Create an associated Graph::Positions object that records the position -of each node in the graph. This method call is probably in the wrong -place and will move. - -=cut - -sub make_positions { - my( $self, $common_nodes, $paths ) = @_; - my $positions = Text::Tradition::Graph::Position->new( $common_nodes, $paths ); - $self->{'positions'} = $positions; -} - -=back - -=head2 Graph::Easy object accessor methods - -See the Graph::Easy documentation for descriptions of these functions. - -=over - -=item B - -=cut - -sub node { - my $self = shift; - return $self->{'graph'}->node( @_ ); -} - -=item B - -=cut - -sub edge { - my $self = shift; - return $self->{'graph'}->edge( @_ ); -} - -=item B - -=cut - -# Not only adds the node, but also initializes internal data -# about the node. - -sub add_node { - my $self = shift; - my $node = $self->{'graph'}->add_node( @_ ); - $self->{'identical_nodes'}->{ $node->name() } = [ $node->name() ]; - return $node; -} - -=item B - -=cut - -sub add_edge { - my $self = shift; - return $self->{'graph'}->add_edge( @_ ); -} - -=item B - -=cut - -sub del_node { - my $self = shift; - my $node = $_[0]; - - # Delete this node out of any relevant transposition pool. - if( ref $node eq 'Graph::Easy::Node' ) { - $node = $node->name(); - } - my @ident = $self->identical_nodes( $node ); - if( @ident ) { - # Get the pool. - my $pool = $self->{'identical_nodes'}->{ $ident[0] }; - foreach my $i ( 0 .. scalar(@$pool)-1 ) { - if( $pool->[$i] eq $node ) { - splice( @$pool, $i, 1 ); - last; - } - } - } - delete $self->{'identical_nodes'}->{ $node }; - - # Now delete the node. - return $self->{'graph'}->del_node( @_ ); -} - -=item B - -=cut - -sub del_edge { - my $self = shift; - return $self->{'graph'}->del_edge( @_ ); -} - -=item B - -=cut - -sub nodes { - my $self = shift; - return $self->{'graph'}->nodes( @_ ); -} - -=item B - -=cut - -sub edges { - my $self = shift; - return $self->{'graph'}->edges( @_ ); -} - -=item B - -=cut - -sub merge_nodes { - my $self = shift; - return $self->{'graph'}->merge_nodes( @_ ); -} - -### Helper methods for navigating the tree - -=back - -=head2 Graph navigation methods - -=over - -=item B - -my $node = $graph->start(); - -Returns the beginning node of the graph. - -=cut - -sub start { - # Return the beginning node of the graph. - my $self = shift; - my( $new_start ) = @_; - if( $new_start ) { - # Fix the node transposition data - delete $self->{'identical_nodes'}->{ $new_start->name() }; - $self->{'identical_nodes'}->{'#START#'} = [ '#START#' ]; - $self->{'graph'}->rename_node( $new_start, '#START#' ); - } - return $self->{'graph'}->node('#START#'); -} - -=item B - -my $next_node = $graph->next_word( $node, $path ); - -Returns the node that follows the given node along the given witness -path. TODO These are badly named. - -=cut - -sub next_word { - # Return the successor via the corresponding edge. - my $self = shift; - return $self->_find_linked_word( 'next', @_ ); -} - -=item B - -my $prior_node = $graph->prior_word( $node, $path ); - -Returns the node that precedes the given node along the given witness -path. TODO These are badly named. - -=cut - -sub prior_word { - # Return the predecessor via the corresponding edge. - my $self = shift; - return $self->_find_linked_word( 'prior', @_ ); -} - -sub _find_linked_word { - my( $self, $direction, $node, $edge ) = @_; - $edge = 'base text' unless $edge; - my @linked_edges = $direction eq 'next' - ? $node->outgoing() : $node->incoming(); - return undef unless scalar( @linked_edges ); - - # We have to find the linked edge that contains all of the - # witnesses supplied in $edge. - my @edge_wits = split( /, /, $edge ); - foreach my $le ( @linked_edges ) { - my @le_wits = split( /, /, $le->name() ); - if( _is_within( \@edge_wits, \@le_wits ) ) { - # This is the right edge. - return $direction eq 'next' ? $le->to() : $le->from(); - } - } - warn "Could not find $direction node from " . $node->label - . " along edge $edge"; - return undef; -} - -# Some set logic. -sub _is_within { - my( $set1, $set2 ) = @_; - my $ret = 1; - foreach my $el ( @$set1 ) { - $ret = 0 unless grep { /^\Q$el\E$/ } @$set2; - } - return $ret; -} - -=item B - -my @nodes = $graph->node_sequence( $first, $last, $path ); - -Returns the ordered list of nodes, starting with $first and ending -with $last, along the given witness path. - -=cut - -sub node_sequence { - my( $self, $start, $end, $witness, $backup ) = @_; - unless( ref( $start ) eq 'Graph::Easy::Node' - && ref( $end ) eq 'Graph::Easy::Node' ) { - warn "Called node_sequence without two nodes!"; - return (); - } - $witness = 'base text' unless $witness; - my @nodes = ( $start ); - my %seen; - my $n = $start; - while( $n ne $end ) { - if( exists( $seen{$n->name()} ) ) { - warn "Detected loop at " . $n->name(); - last; - } - $seen{$n->name()} = 1; - - my @edges = $n->outgoing(); - my @relevant_edges = grep { my @w = split( /, /, $_->label ); - grep { /^\Q$witness\E$/ } @w } @edges; - unless( @relevant_edges ) { - @relevant_edges = grep { my @w = split( /, /, $_->label ); - grep { /^\Q$backup\E$/ } @w } @edges - if $backup; - } - unless( @relevant_edges ) { - @relevant_edges = grep { $_->label() eq 'base text' } @edges; - } - - warn "Did not find an edge for $witness from node " . $n->label - unless scalar @relevant_edges; - my $next = $relevant_edges[0]->to(); - push( @nodes, $next ); - $n = $next; - } - # Check that the last node is our end node. - my $last = $nodes[$#nodes]; - warn "Last node found from " . $start->label() . - " for witness $witness is not the end!" - unless $last eq $end; - - return @nodes; -} - -=item B - -my $text = $graph->string_lemma( $first, $last, $path ); - -Returns the whitespace-separated text, starting with $first and ending -with $last, represented in the graph along the given path. - -=cut - -sub string_lemma { - my( $self, $start, $end, $label ) = @_; - - my @nodes = $self->node_sequence( $start, $end, $label ); - my @words = map { $_->label() } @nodes; - return join( ' ', @words ); -} - -=back - -=head2 Transposition handling methods - -These should really move to their own module. For use when the graph -has split transposed nodes in order to avoid edges that travel -backward. - -=over - -=item B - -$graph->set_identical_node( $node, $other_node ) - -Tell the graph that these two nodes contain the same (transposed) reading. - -=cut - -sub set_identical_node { - my( $self, $node, $main_node ) = @_; - - # The identical_nodes hash contains a key per node, and a value - # that is an arrayref to a list of nodes. Those nodes that are - # the same (transposed) node should be keys that point to the same - # arrayref. Each arrayref should contain the name of each node - # that points to it. So basically here we want to merge the - # arrays for the two nodes that are now identical. The 'main' - # node should always be first in the array. - - my $pool = $self->{'identical_nodes'}->{ $node }; - my $main_pool = $self->{'identical_nodes'}->{ $main_node }; - - my %poolhash; - foreach ( @$main_pool ) { - # Note which nodes are already in the main pool so that we - # don't re-add them. - $poolhash{$_} = 1; - } - - foreach( @$pool ) { - # Add the remaining nodes to the main pool... - push( @$main_pool, $_ ) unless $poolhash{$_}; - } - # ...and set this node to point to the enlarged pool. - $self->{'identical_nodes'}->{ $node } = $main_pool; -} - -=item B - -my @nodes = $graph->identical_nodes( $node ) - -Get a list of nodes that contain the same (transposed) reading as the -given node. - -=cut - -sub identical_nodes { - my( $self, $node ) = @_; - my @others = grep { $_ !~ /^$node$/ } - @{$self->{'identical_nodes'}->{ $node }}; - return @others; -} - -=back - -=head2 Output method(s) - -=over - -=item B - -print $graph->as_svg( $recalculate ); - -Returns an SVG string that represents the graph. Uses GraphViz to do -this, because Graph::Easy doesn't cope well with long graphs. Unless -$recalculate is passed (and is a true value), the method will return a -cached copy of the SVG after the first call to the method. - -=cut - -sub as_svg { - my( $self, $recalc ) = @_; - return $self->{'svg'} if( exists $self->{'svg'} && !$recalc ); - - $self->{'graphviz'} = $self->{'graph'}->as_graphviz() - unless( exists $self->{'graphviz'} && !$recalc ); - - my @cmd = qw/dot -Tsvg/; - my( $svg, $err ); - my $in = $self->{'graphviz'}; - run( \@cmd, \$in, ">", binary(), \$svg ); - $self->{'svg'} = $svg; - return $svg; -} - -=item B - -print $graph->as_graphml( $recalculate ) - -Returns a GraphML representation of the collation graph, with -transposition information and position information. Unless -$recalculate is passed (and is a true value), the method will return a -cached copy of the SVG after the first call to the method. - -=cut - -sub as_graphml { - my( $self, $recalc ) = @_; - return $self->{'graphml'} if( exists $self->{'graphml'} && !$recalc ); - - # Some namespaces - my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns'; - my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance'; - my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' . - 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd'; - - # Create the document and root node - my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" ); - my $root = $graphml->createElementNS( $graphml_ns, 'graphml' ); - $graphml->setDocumentElement( $root ); - $root->setNamespace( $xsi_ns, 'xsi', 0 ); - $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema ); - - # Add the data keys for nodes - my @node_data = ( 'name', 'token', 'identical', 'position' ); - foreach my $ndi ( 0 .. $#node_data ) { - my $key = $root->addNewChild( $graphml_ns, 'key' ); - $key->setAttribute( 'attr.name', $node_data[$ndi] ); - $key->setAttribute( 'attr.type', 'string' ); - $key->setAttribute( 'for', 'node' ); - $key->setAttribute( 'id', 'd'.$ndi ); - } - - # Add the data keys for edges - my %wit_hash; - my $wit_ctr = 0; - foreach my $wit ( $self->getWitnessList ) { - my $wit_key = 'w' . $wit_ctr++; - $wit_hash{$wit} = $wit_key; - my $key = $root->addNewChild( $graphml_ns, 'key' ); - $key->setAttribute( 'attr.name', $wit ); - $key->setAttribute( 'attr.type', 'string' ); - $key->setAttribute( 'for', 'edge' ); - $key->setAttribute( 'id', $wit_key ); - } - - # Add the graph, its nodes, and its edges - my $graph = $root->addNewChild( $graphml_ns, 'graph' ); - $graph->setAttribute( 'edgedefault', 'directed' ); - $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful - $graph->setAttribute( 'parse.edgeids', 'canonical' ); - $graph->setAttribute( 'parse.edges', $self->edges() ); - $graph->setAttribute( 'parse.nodeids', 'canonical' ); - $graph->setAttribute( 'parse.nodes', $self->nodes() ); - $graph->setAttribute( 'parse.order', 'nodesfirst' ); - - my $node_ctr = 0; - my %node_hash; - foreach my $n ( $self->nodes() ) { - my %this_node_data = (); - foreach my $ndi ( 0 .. $#node_data ) { - my $value; - $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name'; - $this_node_data{'d'.$ndi} = $n->label - if $node_data[$ndi] eq 'token'; - $this_node_data{'d'.$ndi} = $self->primary_node( $n ) - if $node_data[$ndi] eq 'name'; - $this_node_data{'d'.$ndi} = - $self->{'positions'}->node_position( $n ) - if $node_data[$ndi] eq 'position'; - } - my $node_el = $graph->addNewChild( $graphml_ns, 'node' ); - my $node_xmlid = 'n' . $node_ctr++; - $node_hash{ $n->name } = $node_xmlid; - $node_el->setAttribute( 'id', $node_xmlid ); - - foreach my $dk ( keys %this_node_data ) { - my $d_el = $node_el->addNewChild( $graphml_ns, 'data' ); - $d_el->setAttribute( 'key', $dk ); - $d_el->appendTextChild( $this_node_data{$dk} ); - } - } - - foreach my $e ( $self->edges() ) { - my( $name, $from, $to ) = ( $e->name, - $node_hash{ $e->from()->name() }, - $node_hash{ $e->to()->name() } ); - my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' ); - $edge_el->setAttribute( 'source', $from ); - $edge_el->setAttribute( 'target', $to ); - $edge_el->setAttribute( 'id', $name ); - # TODO Got to add the witnesses - } - - # Return the thing - $self->{'graphml'} = $graphml; - return $graphml; -} - -=back - -=head2 Lemmatization methods - -=over - -=item B - -=cut - -sub init_lemmatizer { - my $self = shift; - # Initialize the 'lemma' hash, going through all the nodes and seeing - # which ones are common nodes. This should only be run once. - - return if( $self->{'lemmatizer_initialized'} ); - my @active_names = map { $_->name } grep { $self->is_common( $_ ) } - $self->nodes(); - $self->{'positions'}->init_lemmatizer( @active_names ); - $self->{'lemmatizer_initialized'} = 1; - -} - -=item B - -my @nodes_turned_off = $graph->toggle_node( $node ); - -Takes a node name, and either lemmatizes or de-lemmatizes it. Returns -a list of all nodes that are de-lemmatized as a result of the toggle. - -=cut - -sub toggle_node { - my( $self, $node ) = @_; - - # In case this is being called for the first time. - $self->init_lemmatizer(); - - if( !$node || $self->is_common( $node ) ) { - # Do nothing, it's a common node. - return; - } - - my $pos = $self->{'positions'}->node_position( $node ); - my $old_state = $self->{'positions'}->state( $pos ); - my @nodes_off; - if( $old_state && $old_state eq $node ) { - # Turn off the node. We turn on no others by default. - push( @nodes_off, $node ); - } else { - # Turn on the node. - $self->{'positions'}->set_state( $pos, $node ); - # Any other 'on' nodes in the same position should be off. - push( @nodes_off, $self->colocated_nodes( $node ) ); - # Any node that is an identical transposed one should be off. - push( @nodes_off, $self->identical_nodes( $node ) ) - if $self->identical_nodes( $node ); - } - @nodes_off = unique_list( @nodes_off ); - - # Turn off the nodes that need to be turned off. - my @nodes_turned_off; - foreach my $n ( @nodes_off ) { - my $npos = $self->{'positions'}->node_position( $n ); - my $state = $self->{'positions'}->state( $npos ); - if( $state && $state eq $n ) { - # this node is still on - push( @nodes_turned_off, $n ); - my $new_state = undef; - if( $n eq $node ) { - # This is the node that was clicked, so if there are no - # other nodes there, turn off the position. In all other - # cases, restore the ellipsis. - my @all_n = $self->{'positions'}->nodes_at_position( $pos ); - $new_state = 0 if scalar( @all_n ) == 1; - } - $self->{'positions'}->set_state( $npos, $new_state ); - } elsif( $old_state && $old_state eq $n ) { - # another node has already been turned on here - push( @nodes_turned_off, $n ); - } # else some other node was on anyway, so pass. - } - return @nodes_turned_off; -} - -=item B - -my @state = $graph->active_nodes( @nodes_turned_off ); - -Takes a list of nodes that have just been turned off, and returns a -set of tuples of the form ['node', 'state'] that indicates what -changes need to be made to the graph. - -=over - -=item * - -A state of 1 means 'turn on this node' - -=item * - -A state of 0 means 'turn off this node' - -=item * - -A state of undef means 'an ellipsis belongs in the text here because -no decision has been made' - -=back - -=cut - -sub active_nodes { - my( $self, @toggled_off_nodes ) = @_; - - # In case this is the first run - $self->init_lemmatizer(); - # First get the positions of those nodes which have been - # toggled off. - my $positions_off = {}; - map { $positions_off->{ $self->{'positions'}->node_position( $_ ) } = $_ } - @toggled_off_nodes; - - - # Now for each position, we have to see if a node is on, and we - # have to see if a node has been turned off. - my @answer; - foreach my $pos ( $self->{'positions'}->all() ) { - # Find the state of this position. If there is an active node, - # its name will be the state; otherwise the state will be 0 - # (nothing at this position) or undef (ellipsis at this position) - my $active = $self->{'positions'}->state( $pos ); - - # Is there a formerly active node that was toggled off? - if( exists( $positions_off->{$pos} ) ) { - my $off_node = $positions_off->{$pos}; - if( $active && $active ne $off_node) { - push( @answer, [ $off_node, 0 ], [ $active, 1 ] ); - } else { - push( @answer, [ $off_node, $active ] ); - } - - # No formerly active node, so we just see if there is a currently - # active one. - } elsif( $active ) { - # Push the active node, whatever it is. - push( @answer, [ $active, 1 ] ); - } else { - # Push the state that is there. Arbitrarily use the first node - # at that position. - my @pos_nodes = $self->{'positions'}->nodes_at_position( $pos ); - push( @answer, - [ $pos_nodes[0], $self->{'positions'}->state( $pos ) ] ); - } - } - - return @answer; -} - -# A couple of helpers. - -sub is_common { - my( $self, $node ) = @_; - $node = $self->_nodeobj( $node ); - return $node->get_attribute('class') eq 'common'; -} - -sub _nodeobj { - my( $self, $node ) = @_; - unless( ref $node eq 'Graph::Easy::Node' ) { - $node = $self->node( $node ); - } - return $node; -} - -sub colocated_nodes { - my $self = shift; - return $self->{'positions'}->colocated_nodes( @_ ); -} - -sub text_of_node { - my( $self, $node_id ) = @_; - # This is the label of the given node. - return $self->node( $node_id )->label(); -} - -sub text_for_witness { - my( $self, $wit ) = @_; - - my @nodes = $self->{'positions'}->witness_path( $wit ); - my @words = map { $self->node( $_ )->label() } @nodes; - return join( ' ', @words ); -} - -sub unique_list { - my( @list ) = @_; - my %h; - map { $h{$_} = 1 } @list; - return keys( %h ); -} - -=back - -=head1 LICENSE - -This package is free software and is provided "as is" without express -or implied warranty. You can redistribute it and/or modify it under -the same terms as Perl itself. - -=head1 AUTHOR - -Tara L Andrews, aurum@cpan.org - -=cut - -1; - diff --git a/lib/Text/Tradition/Graph/Position.pm b/lib/Text/Tradition/Graph/Position.pm deleted file mode 100644 index d9f023d..0000000 --- a/lib/Text/Tradition/Graph/Position.pm +++ /dev/null @@ -1,332 +0,0 @@ -package Text::Tradition::Graph::Position; - -use strict; -use warnings; - -=head1 NAME - -Text::Tradition::Graph::Position - -=head1 SUMMARY - -An object to go with a text graph that keeps track of relative -positions of the nodes on that graph. This is useful for keeping -track of which readings are variants of each other, which is expensive -to calculate every time from the graph itself. - -=head1 METHODS - -=over 4 - -=item B - -Takes two arguments: a list of names of common nodes in the graph, and -a list of witness paths. Calculates position identifiers for each -node based on this. - -=cut - -# TODO Why not just hand over the graph and calculate the common nodes -# and witness paths here? -sub new { - my $proto = shift; - my( $common_nodes, $witness_paths ) = @_; - - my $self = {}; - - # We have to calculate the position identifiers for each word, - # keyed on the common nodes. This will be 'fun'. The end result - # is a hash per witness, whose key is the word node and whose - # value is its position in the text. Common nodes are always N,1 - # so have identical positions in each text. - - my $node_pos = {}; - foreach my $wit ( keys %$witness_paths ) { - # First we walk each path, making a matrix for each witness that - # corresponds to its eventual position identifier. Common nodes - # always start a new row, and are thus always in the first column. - - my $wit_matrix = []; - my $cn = 0; # We should hit the common nodes in order. - my $row = []; - foreach my $wn ( @{$witness_paths->{$wit}} ) { # $wn is a node name - if( $wn eq $common_nodes->[$cn] ) { - # Set up to look for the next common node, and - # start a new row of words. - $cn++; - push( @$wit_matrix, $row ) if scalar( @$row ); - $row = []; - } - push( @$row, $wn ); - } - push( @$wit_matrix, $row ); # Push the last row onto the matrix - - # Now we have a matrix per witness, so that each row in the - # matrix begins with a common node, and continues with all the - # variant words that appear in the witness. We turn this into - # real positions in row,cell format. But we need some - # trickery in order to make sure that each node gets assigned - # to only one position. - - foreach my $li ( 1..scalar(@$wit_matrix) ) { - foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) { - my $node = $wit_matrix->[$li-1]->[$di-1]; - my $position = "$li,$di"; - # If we have seen this node before, we need to compare - # its position with what went before. - unless( exists $node_pos->{ $node } && - _cmp_position( $position, $node_pos->{ $node }) < 1 ) { - # The new position ID replaces the old one. - $node_pos->{$node} = $position; - } # otherwise, the old position needs to stay. - } - } - } - - # Now we have a hash of node positions keyed on node. - $self->{'node_positions'} = $node_pos; - # We should also save our witness paths, as long as we have them. - # Right now each path is a list of nodes; we may want to make it - # a list of position refs. - $self->{'witness_paths'} = $witness_paths; - - # We are also going to want to keep track of whether a position has - # been explicitly emptied, for our lemmatization. - my $position_state = {}; - map { $position_state->{ $_ } = undef } values %$node_pos; - $self->{'position_state'} = $position_state; - - - bless( $self, $proto ); - return $self; -} - -=item B - -my $pos = $positions->node_position( $node ); - -Returns the position identifier for a given node in the graph. - -=cut - -sub node_position { - my( $self, $node ) = @_; - $node = _name( $node ); - - unless( exists( $self->{'node_positions'}->{ $node } ) ) { - warn "No node with name $node known to the graph"; - return; - } - - return $self->{'node_positions'}->{ $node }; -} - -=item B - -my @nodes = $positions->nodes_at_position( $pos ); - -Returns the names of all the nodes in the graph at a given position. - -=cut - -sub nodes_at_position { - my( $self, $pos ) = @_; - - my $positions = $self->_calc_positions(); - unless( exists $positions->{ $pos } ) { - warn "No position $pos in the graph"; - return; - } - return @{ $positions->{ $pos }}; -} - -=item B - -my @nodes = $positions->colocated_nodes( $node ); - -Returns the names of all the nodes in the graph at the same position -as the node given, apart from that node itself. - -=cut - -sub colocated_nodes { - my( $self, $node ) = @_; - $node = _name( $node ); - my $pos = $self->node_position( $node ); - my @loc_nodes = $self->nodes_at_position( $pos ); - - my @cn = grep { $_ !~ /^$node$/ } @loc_nodes; - return @cn; -} - -=item B - -my @position_list = $positions->all() - -Returns an ordered list of positions in the graph. - -=cut - -sub all { - my( $self ) = @_; - my $pos = $self->_calc_positions; - return sort by_position keys( %$pos ); -} - -sub witness_path { - my( $self, $wit ) = @_; - return @{$self->{'witness_paths'}->{ $wit }}; -} - -=back - -=head2 Lemmatization functions - -For some traditions, each position will have at least one node that is -the 'lemma text', that is, the text that an editor has chosen to stand -as authoritative for the tradition. The following methods keep -track of what lemma, if any, should stand at each position. - -=over - -=item B - -$positions->init_lemmatizer( @nodelist ) - -Sets up the necessary logic for keeping track of lemmas. It should be -called once, with the initial list of lemmas. - -=cut - -# TODO We can initialize this without the argument, based on the passed -# list of common nodes. -sub init_lemmatizer { - my( $self, @nodes ) = @_; - foreach my $n ( @nodes ) { - $self->set_state( $self->node_position( $n ), $n ); - } -} - -=item B - -my $answer = $positions->state( $position_id ) - -For the given position ID, returns the node (if any) that stands at -the lemma. If no node should stand as lemma at this position, returns -0; if no decision has been made for this position, returns undef. - -=cut - -sub state { - my( $self, $pos ) = @_; - return $self->{'position_state'}->{ $pos }; -} - -=item B - -$positions->set_state( $position_id, $state ) - -For the given position ID, sets the lemma (if any). State can be the -name of a node, 0 (for cases when no lemma should stand), or undef -(for cases when no decision has been made). - -=cut - -sub set_state { - my( $self, $pos, $state ) = @_; - $self->{'position_state'}->{ $pos } = $state; -} - -=back - -=head2 Comparison function - -=over - -=item B - -my @nodelist = sort $positions->by_position @nodelist; - -For use in the 'sort' function. Returns a comparison value based on -the position of the given nodes. - -=cut - -# Compares two nodes according to their positions in the witness -# index hash. -sub by_position { - my $self = shift; - return _cmp_position( $a, $b ); -} - -# Takes two position strings (X,Y) and sorts them. -sub _cmp_position { - my( $a, $b ) = @_; - my @pos_a = split(/,/, $a ); - my @pos_b = split(/,/, $b ); - - my $big_cmp = $pos_a[0] <=> $pos_b[0]; - return $big_cmp if $big_cmp; - # else - return $pos_a[1] <=> $pos_b[1]; -} - - -#### HELPER FUNCTIONS #### - -# At some point I may find myself using scalar references for the node -# positions, in order to keep them easily in sync. Just in case, I will -# calculate this every time I need it. -sub _calc_positions { - my $self = shift; - return _invert_hash( $self->{'node_positions'} ) -} - -# Helper for dealing with node refs -sub _name { - my( $node ) = @_; - # We work with node names in this library - if( ref( $node ) && ref( $node ) eq 'Graph::Easy::Node' ) { - $node = $node->name(); - } - return $node; -} - -# Useful helper. Will be especially useful if I find myself using -# scalar references for the positions after all - it can dereference -# them here. -sub _invert_hash { - my ( $hash, $plaintext_keys ) = @_; - my %new_hash; - foreach my $key ( keys %$hash ) { - my $val = $hash->{$key}; - my $valkey = $val; - if( $plaintext_keys - && ref( $val ) ) { - $valkey = $plaintext_keys->{ scalar( $val ) }; - warn( "No plaintext value given for $val" ) unless $valkey; - } - if( exists ( $new_hash{$valkey} ) ) { - push( @{$new_hash{$valkey}}, $key ); - } else { - $new_hash{$valkey} = [ $key ]; - } - } - return \%new_hash; -} - -=back - -=head1 LICENSE - -This package is free software and is provided "as is" without express -or implied warranty. You can redistribute it and/or modify it under -the same terms as Perl itself. - -=head1 AUTHOR - -Tara L Andrews, aurum@cpan.org - -=cut - -1; diff --git a/lib/Text/Tradition/Schema.pm b/lib/Text/Tradition/Schema.pm deleted file mode 100644 index d6161d7..0000000 --- a/lib/Text/Tradition/Schema.pm +++ /dev/null @@ -1,20 +0,0 @@ -package Traditions::Schema; - -# Created by DBIx::Class::Schema::Loader -# DO NOT MODIFY THE FIRST PART OF THIS FILE - -use strict; -use warnings; - -use base 'DBIx::Class::Schema'; - -__PACKAGE__->load_namespaces; - - -# Created by DBIx::Class::Schema::Loader v0.07002 @ 2010-10-19 17:34:43 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:iZNRQ5HMUi7/5s+iC1WPmg - -my $database = '/home/tla/stemmatology/db/traditions.db'; -__PACKAGE__->connection( "dbi:SQLite:dbname=$database" ); - -1; diff --git a/lib/Text/Tradition/Schema/Result/Collation.pm b/lib/Text/Tradition/Schema/Result/Collation.pm deleted file mode 100644 index d1dcaa6..0000000 --- a/lib/Text/Tradition/Schema/Result/Collation.pm +++ /dev/null @@ -1,89 +0,0 @@ -package Traditions::Schema::Result::Collation; - -# Created by DBIx::Class::Schema::Loader -# DO NOT MODIFY THE FIRST PART OF THIS FILE - -use strict; -use warnings; - -use base 'DBIx::Class::Core'; - - -=head1 NAME - -Traditions::Schema::Result::Collation - -=cut - -__PACKAGE__->table("collations"); - -=head1 ACCESSORS - -=head2 collationid - - data_type: 'integer' - is_auto_increment: 1 - is_nullable: 0 - -=head2 text - - data_type: 'integer' - is_foreign_key: 1 - is_nullable: 0 - -=head2 tag - - data_type: 'text' - is_nullable: 1 - -=cut - -__PACKAGE__->add_columns( - "collationid", - { data_type => "integer", is_auto_increment => 1, is_nullable => 0 }, - "text", - { data_type => "integer", is_foreign_key => 1, is_nullable => 0 }, - "tag", - { data_type => "text", is_nullable => 1 }, -); -__PACKAGE__->set_primary_key("collationid"); - -=head1 RELATIONS - -=head2 readings - -Type: has_many - -Related object: L - -=cut - -__PACKAGE__->has_many( - "readings", - "Traditions::Schema::Result::Reading", - { "foreign.collation" => "self.collationid" }, - { cascade_copy => 0, cascade_delete => 0 }, -); - -=head2 text - -Type: belongs_to - -Related object: L - -=cut - -__PACKAGE__->belongs_to( - "text", - "Traditions::Schema::Result::Text", - { textid => "text" }, - { is_deferrable => 1, on_delete => "CASCADE", on_update => "CASCADE" }, -); - - -# Created by DBIx::Class::Schema::Loader v0.07002 @ 2010-10-19 17:34:43 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:+l31v5NCx//HtluCu+1JeQ - - -# You can replace this text with custom content, and it will be preserved on regeneration -1; diff --git a/lib/Text/Tradition/Schema/Result/Manuscript.pm b/lib/Text/Tradition/Schema/Result/Manuscript.pm deleted file mode 100644 index a5aaec1..0000000 --- a/lib/Text/Tradition/Schema/Result/Manuscript.pm +++ /dev/null @@ -1,124 +0,0 @@ -package Traditions::Schema::Result::Manuscript; - -# Created by DBIx::Class::Schema::Loader -# DO NOT MODIFY THE FIRST PART OF THIS FILE - -use strict; -use warnings; - -use base 'DBIx::Class::Core'; - - -=head1 NAME - -Traditions::Schema::Result::Manuscript - -=cut - -__PACKAGE__->table("manuscripts"); - -=head1 ACCESSORS - -=head2 manuscriptid - - data_type: 'integer' - is_auto_increment: 1 - is_nullable: 0 - -=head2 text - - data_type: 'integer' - is_foreign_key: 1 - is_nullable: 0 - -=head2 description - - data_type: 'text' - is_nullable: 0 - -=head2 sigil - - data_type: 'text' - is_nullable: 0 - -=head2 first_word - - data_type: 'integer' - is_foreign_key: 1 - is_nullable: 1 - -=cut - -__PACKAGE__->add_columns( - "manuscriptid", - { data_type => "integer", is_auto_increment => 1, is_nullable => 0 }, - "text", - { data_type => "integer", is_foreign_key => 1, is_nullable => 0 }, - "description", - { data_type => "text", is_nullable => 0 }, - "sigil", - { data_type => "text", is_nullable => 0 }, - "first_word", - { data_type => "integer", is_foreign_key => 1, is_nullable => 1 }, -); -__PACKAGE__->set_primary_key("manuscriptid"); - -=head1 RELATIONS - -=head2 first_word - -Type: belongs_to - -Related object: L - -=cut - -__PACKAGE__->belongs_to( - "first_word", - "Traditions::Schema::Result::Reading", - { readingid => "first_word" }, - { - is_deferrable => 1, - join_type => "LEFT", - on_delete => "CASCADE", - on_update => "CASCADE", - }, -); - -=head2 text - -Type: belongs_to - -Related object: L - -=cut - -__PACKAGE__->belongs_to( - "text", - "Traditions::Schema::Result::Text", - { textid => "text" }, - { is_deferrable => 1, on_delete => "CASCADE", on_update => "CASCADE" }, -); - -=head2 readings - -Type: has_many - -Related object: L - -=cut - -__PACKAGE__->has_many( - "readings", - "Traditions::Schema::Result::Reading", - { "foreign.manuscript" => "self.manuscriptid" }, - { cascade_copy => 0, cascade_delete => 0 }, -); - - -# Created by DBIx::Class::Schema::Loader v0.07002 @ 2010-10-19 17:34:43 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:71XIOYGQBGADyQrj4WE49Q - - -# You can replace this text with custom content, and it will be preserved on regeneration -1; diff --git a/lib/Text/Tradition/Schema/Result/Reading.pm b/lib/Text/Tradition/Schema/Result/Reading.pm deleted file mode 100644 index e336406..0000000 --- a/lib/Text/Tradition/Schema/Result/Reading.pm +++ /dev/null @@ -1,212 +0,0 @@ -package Traditions::Schema::Result::Reading; - -# Created by DBIx::Class::Schema::Loader -# DO NOT MODIFY THE FIRST PART OF THIS FILE - -use strict; -use warnings; - -use base 'DBIx::Class::Core'; - - -=head1 NAME - -Traditions::Schema::Result::Reading - -=cut - -__PACKAGE__->table("readings"); - -=head1 ACCESSORS - -=head2 readingid - - data_type: 'integer' - is_auto_increment: 1 - is_nullable: 0 - -=head2 prior_reading - - data_type: 'integer' - is_foreign_key: 1 - is_nullable: 1 - -=head2 next_reading - - data_type: 'integer' - is_foreign_key: 1 - is_nullable: 1 - -=head2 readingtext - - data_type: 'text' - is_nullable: 0 - -=head2 ante_corr - - data_type: 'text' - is_nullable: 1 - -=head2 manuscript - - data_type: 'integer' - is_foreign_key: 1 - is_nullable: 0 - -=head2 collation - - data_type: 'integer' - is_foreign_key: 1 - is_nullable: 1 - -=cut - -__PACKAGE__->add_columns( - "readingid", - { data_type => "integer", is_auto_increment => 1, is_nullable => 0 }, - "prior_reading", - { data_type => "integer", is_foreign_key => 1, is_nullable => 1 }, - "next_reading", - { data_type => "integer", is_foreign_key => 1, is_nullable => 1 }, - "readingtext", - { data_type => "text", is_nullable => 0 }, - "ante_corr", - { data_type => "text", is_nullable => 1 }, - "manuscript", - { data_type => "integer", is_foreign_key => 1, is_nullable => 0 }, - "collation", - { data_type => "integer", is_foreign_key => 1, is_nullable => 1 }, -); -__PACKAGE__->set_primary_key("readingid"); -__PACKAGE__->add_unique_constraint("next_reading_unique", ["next_reading"]); -__PACKAGE__->add_unique_constraint("prior_reading_unique", ["prior_reading"]); - -=head1 RELATIONS - -=head2 manuscripts - -Type: has_many - -Related object: L - -=cut - -__PACKAGE__->has_many( - "manuscripts", - "Traditions::Schema::Result::Manuscript", - { "foreign.first_word" => "self.readingid" }, - { cascade_copy => 0, cascade_delete => 0 }, -); - -=head2 collation - -Type: belongs_to - -Related object: L - -=cut - -__PACKAGE__->belongs_to( - "collation", - "Traditions::Schema::Result::Collation", - { collationid => "collation" }, - { - is_deferrable => 1, - join_type => "LEFT", - on_delete => "CASCADE", - on_update => "CASCADE", - }, -); - -=head2 manuscript - -Type: belongs_to - -Related object: L - -=cut - -__PACKAGE__->belongs_to( - "manuscript", - "Traditions::Schema::Result::Manuscript", - { manuscriptid => "manuscript" }, - { is_deferrable => 1, on_delete => "CASCADE", on_update => "CASCADE" }, -); - -=head2 next_reading - -Type: belongs_to - -Related object: L - -=cut - -__PACKAGE__->belongs_to( - "next_reading", - "Traditions::Schema::Result::Reading", - { readingid => "next_reading" }, - { - is_deferrable => 1, - join_type => "LEFT", - on_delete => "CASCADE", - on_update => "CASCADE", - }, -); - -=head2 reading_next_reading - -Type: might_have - -Related object: L - -=cut - -__PACKAGE__->might_have( - "reading_next_reading", - "Traditions::Schema::Result::Reading", - { "foreign.next_reading" => "self.readingid" }, - { cascade_copy => 0, cascade_delete => 0 }, -); - -=head2 prior_reading - -Type: belongs_to - -Related object: L - -=cut - -__PACKAGE__->belongs_to( - "prior_reading", - "Traditions::Schema::Result::Reading", - { readingid => "prior_reading" }, - { - is_deferrable => 1, - join_type => "LEFT", - on_delete => "CASCADE", - on_update => "CASCADE", - }, -); - -=head2 reading_prior_reading - -Type: might_have - -Related object: L - -=cut - -__PACKAGE__->might_have( - "reading_prior_reading", - "Traditions::Schema::Result::Reading", - { "foreign.prior_reading" => "self.readingid" }, - { cascade_copy => 0, cascade_delete => 0 }, -); - - -# Created by DBIx::Class::Schema::Loader v0.07002 @ 2010-10-19 17:34:43 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:ze+1/h74nB4r9fc6AGIIkQ - - -# You can replace this text with custom content, and it will be preserved on regeneration -1; diff --git a/lib/Text/Tradition/Schema/Result/Text.pm b/lib/Text/Tradition/Schema/Result/Text.pm deleted file mode 100644 index 80712e2..0000000 --- a/lib/Text/Tradition/Schema/Result/Text.pm +++ /dev/null @@ -1,88 +0,0 @@ -package Traditions::Schema::Result::Text; - -# Created by DBIx::Class::Schema::Loader -# DO NOT MODIFY THE FIRST PART OF THIS FILE - -use strict; -use warnings; - -use base 'DBIx::Class::Core'; - - -=head1 NAME - -Traditions::Schema::Result::Text - -=cut - -__PACKAGE__->table("texts"); - -=head1 ACCESSORS - -=head2 textid - - data_type: 'integer' - is_auto_increment: 1 - is_nullable: 0 - -=head2 provenance - - data_type: 'text' - is_nullable: 0 - -=head2 description - - data_type: 'text' - is_nullable: 1 - -=cut - -__PACKAGE__->add_columns( - "textid", - { data_type => "integer", is_auto_increment => 1, is_nullable => 0 }, - "provenance", - { data_type => "text", is_nullable => 0 }, - "description", - { data_type => "text", is_nullable => 1 }, -); -__PACKAGE__->set_primary_key("textid"); - -=head1 RELATIONS - -=head2 manuscripts - -Type: has_many - -Related object: L - -=cut - -__PACKAGE__->has_many( - "manuscripts", - "Traditions::Schema::Result::Manuscript", - { "foreign.text" => "self.textid" }, - { cascade_copy => 0, cascade_delete => 0 }, -); - -=head2 collations - -Type: has_many - -Related object: L - -=cut - -__PACKAGE__->has_many( - "collations", - "Traditions::Schema::Result::Collation", - { "foreign.text" => "self.textid" }, - { cascade_copy => 0, cascade_delete => 0 }, -); - - -# Created by DBIx::Class::Schema::Loader v0.07002 @ 2010-10-19 17:34:43 -# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:nRxu7u/rg6k397lkxT3IWQ - - -# You can replace this text with custom content, and it will be preserved on regeneration -1;