From: Tara L Andrews Date: Tue, 26 Jul 2011 14:51:48 +0000 (+0200) Subject: dot generation works on collation output TEI, svg generation does not X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f2b9605f2bba07e0f3ed5e6fe34f36b384de6eb7;p=scpubgit%2Fstemmatology.git dot generation works on collation output TEI, svg generation does not --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 5f569df..40cc565 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -312,7 +312,7 @@ sub as_dot { next if $reading->name eq $reading->label; # TODO output readings or segments, but not both next if $reading->class eq 'node.segment'; - $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ]\n", $reading->name, $reading->label ); + $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->name, $reading->label ); } my @edges = $view eq 'relationship' ? $self->relationships : $self->paths; @@ -322,7 +322,7 @@ sub as_dot { 'label' => $edge->label, ); my $varopts = join( ', ', map { $_.'="'.$variables{$_}.'"' } sort keys %variables ); - $dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ]\n", + $dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ];\n", $edge->from->name, $edge->to->name, $varopts ); } $dot .= "}\n"; diff --git a/lib/Text/Tradition/Parser/TEI.pm b/lib/Text/Tradition/Parser/TEI.pm index 285b780..59ee42c 100644 --- a/lib/Text/Tradition/Parser/TEI.pm +++ b/lib/Text/Tradition/Parser/TEI.pm @@ -35,42 +35,67 @@ sub parse { my $parser = XML::LibXML->new(); my $doc = $parser->parse_string( $xml_str ); my $tei = $doc->documentElement(); - $xpc = XML::LibXML::XPathContext->new( $tei ); + my $xpc = XML::LibXML::XPathContext->new( $tei ); $xpc->registerNs( 'tei', 'http://www.tei-c.org/ns/1.0' ); # Then get the witnesses and create the witness objects. foreach my $wit_el ( $xpc->findnodes( '//tei:listWit/tei:witness' ) ) { my $sig = $wit_el->getAttribute( 'xml:id' ); - my $source = $wit_el->toString(); # Save all the XML info we have + my $source = $wit_el->toString(); $tradition->add_witness( sigil => $sig, source => $source ); } # Now go through the text and make the tokens. # Assume for now that each word is tokenized in the XML. my $text = {}; - map { $text->{$_->sigil} = [ $tradition->start ] } @{$tradition->witnesses}; + map { $text->{$_->sigil} = [] } @{$tradition->witnesses}; + my $word_ctr = 0; + my %used_word_ids; foreach my $word_el ( $xpc->findnodes( '//tei:w|tei:seg' ) ) { # If it is contained within a lem or a rdg, look at those witnesses. # Otherwise it is common to all witnesses. # Also common if it is the only lem/rdg within its app. # Thus we are assuming non-nested apps. - my $node_id = $word_el->getAttribute( 'xml:id' ); + my $parent_rdg = $xpc->find( 'parent::tei:lem|parent::tei:rdg', $word_el ); my @wits = get_sigla( $parent_rdg ); - @wits = map { $_->sigil } @{$tradition->witnesses} unless $wits; - - # TODO Create the node - my $reading = $word_el->textContent(); - - # TODO Figure out if it is a common node - + @wits = map { $_->sigil } @{$tradition->witnesses} unless @wits; + + # Create the node + my $reading = make_reading( $tradition->collation, $word_el ); + + # Figure out if it is a common node, that is, if it is outside an apparatus + # or the only rdg in an apparatus + my $common = 1; + if( $xpc->findnodes( 'ancestor::tei:app', $word_el ) ) { + # If we are in an app we are not a common node... + $common = 0; + if( $xpc->findnodes( 'ancestor::tei:app/tei:rdg' )->size == 1 ) { + # unless we are the only reading in the app. + $common = 1; + } + } + $reading->make_common if $common; + foreach my $sig ( @wits ) { push( @{$text->{$sig}}, $reading ); } } + $DB::single = 1; # Now we have the text paths through the witnesses, so we can make # the edges. + my $end = $tradition->collation->add_reading( '#END#' ); + foreach my $sigil ( keys %$text ) { + my @nodes = @{$text->{$sigil}}; + my $source = $tradition->collation->start; + foreach my $n ( @nodes ) { + # print STDERR sprintf( "Joining %s -> %s for wit %s\n", $source->text, $n->text, $sigil ); + $tradition->collation->add_path( $source, $n, $sigil ); + $source = $n; + } + $tradition->collation->add_path( $source, $end, $sigil ); + } # TODO think about relationships, transpositions, etc. } @@ -85,10 +110,35 @@ sub get_sigla { my @wits; if( ref( $rdg ) eq 'XML::LibXML::Element' ) { - @wits = split( /\s+/, $rdg->get_attribute( 'wit' ) ); + @wits = split( /\s+/, $rdg->getAttribute( 'wit' ) ); map { $_ =~ s/^\#// } @wits; } return @wits; } +{ + my $word_ctr = 0; + my %used_nodeids; + + sub make_reading { + my( $graph, $word_el) = @_; + my $xml_id = $word_el->getAttribute( 'xml:id' ); + if( $xml_id && exists $used_nodeids{$xml_id} ) { + warn "Already used assigned ID $xml_id"; + $xml_id = undef; + } + if( !$xml_id ) { + until( $xml_id ) { + my $try_id = 'w'.$word_ctr++; + next if exists $used_nodeids{$try_id}; + $xml_id = $try_id; + } + } + my $rdg = $graph->add_reading( $xml_id ); + $rdg->text( $word_el->textContent() ); + $used_nodeids{$xml_id} = $rdg; + return $rdg; + } +} + 1; diff --git a/lib/Text/Tradition/Witness.pm b/lib/Text/Tradition/Witness.pm index eb63bb3..161cfa4 100644 --- a/lib/Text/Tradition/Witness.pm +++ b/lib/Text/Tradition/Witness.pm @@ -57,16 +57,19 @@ sub BUILD { my $self = shift; if( $self->has_source ) { # Read the file and initialize the text. - open( WITNESS, $self->source ) or die "Could not open " - . $self->file . "for reading"; - # TODO support TEI as well as plaintext, sometime - my @words; - while() { - chomp; - push( @words, split( /\s+/, $_ ) ); - } - close WITNESS; - $self->text( \@words ); + my $rc; + eval { no warnings; $rc = open( WITNESS, $self->source ); }; + # If we didn't open a file, assume it is a string. + if( $rc ) { + my @words; + while() { + chomp; + push( @words, split( /\s+/, $_ ) ); + } + close WITNESS; + $self->text( \@words ); + } # else the text is in the source string, probably + # XML, and we are doing nothing with it. } }