From: Tara L Andrews Date: Fri, 6 Jan 2012 15:38:45 +0000 (+0100) Subject: fix up CTE parser, including an ugly hack I need, with new graph X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=127201444b501c247b995702f092c943f303980c;p=scpubgit%2Fstemmatology.git fix up CTE parser, including an ugly hack I need, with new graph --- diff --git a/TreeOfTexts/lib/TreeOfTexts/Controller/Root.pm b/TreeOfTexts/lib/TreeOfTexts/Controller/Root.pm index 9707dfa..3141c0b 100644 --- a/TreeOfTexts/lib/TreeOfTexts/Controller/Root.pm +++ b/TreeOfTexts/lib/TreeOfTexts/Controller/Root.pm @@ -24,7 +24,8 @@ TreeOfTexts::Controller::Root - Root Controller for TreeOfTexts =head2 index -The root page (/) +The root page (/). Lists the traditions available in the DB to work on, +and should also eventually have an 'Upload new' interface. =cut @@ -33,11 +34,10 @@ sub index :Path :Args(0) { my $m = $c->model('Directory'); my @all_texts; - foreach my $id ( $m->traditions ) { + foreach my $id ( $m->tradition_ids ) { my $data = { 'id' => $id, - 'name' => $m->tradition( $id )->name, - 'has_stemma' => defined $m->stemma( $id ), + 'name' => $m->name( $id ), }; push( @all_texts, $data ); } @@ -46,6 +46,17 @@ sub index :Path :Args(0) { $c->stash->{template} = 'frontpage.tt'; } +=head2 tradition (TODO) + +The main page for a tradition, with information about it and links to the +available tools. + +=head2 relationships + +The relationship editor tool. + +=cut + sub relationships :Local { my( $self, $c ) = @_; my $m = $c->model('Directory'); @@ -54,22 +65,37 @@ sub relationships :Local { $c->stash->{template} = 'relationships.tt'; } +=head2 stexaminer + +The stemma analysis tool with the pretty colored table. + +=cut + sub stexaminer :Local { my( $self, $c ) = @_; my $m = $c->model('Directory'); - my $id = $c->request->params->{'textid'}; - my $tradition = $m->tradition( $id ); - my $stemma = $m->stemma( $id ); - my $t = run_analysis( $tradition, $stemma ); + my $tradition = $m->tradition( $c->request->params->{'textid'} ); + my $stemma = $tradition->stemma; + # TODO Think about caching the stemma in a session $c->stash->{svg} = $stemma->as_svg; - $c->stash->{variants} = $t->{'variants'}; $c->stash->{text_title} = $tradition->name; + $c->stash->{template} = 'index.tt'; + # TODO Run the analysis as AJAX from the loaded page. + my $t = run_analysis( $tradition ); + $c->stash->{variants} = $t->{'variants'}; $c->stash->{total} = $t->{'variant_count'}; $c->stash->{genealogical} = $t->{'genealogical_count'}; $c->stash->{conflict} = $t->{'conflict_count'}; - $c->stash->{template} = 'index.tt'; } +=head1 OPENSOCIAL URLs + +=head2 view_table + +Simple gadget to return the analysis table for the stexaminer + +=cut + sub view_table :Local { my( $self, $c ) = @_; my $m = $c->model('Directory'); @@ -79,10 +105,16 @@ sub view_table :Local { $c->stash->{template} = 'table_gadget.tt'; } +=head2 view_svg + +Simple gadget to return the SVG for a given stemma + +=cut + sub view_svg :Local { my( $self, $c ) = @_; my $m = $c->model('Directory'); - my $stemma = $m->stemma( $c->request->params->{'textid'} ); + my $stemma = $m->tradition( $c->request->params->{'textid'} )->stemma; if( $stemma ) { $c->stash->{svg} = $stemma->as_svg; } diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index d0d0385..b8265e0 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -37,6 +37,8 @@ Options include: =item is_lacuna - The 'reading' represents a known gap in the text. +=item is_ph - A temporary placeholder for apparatus parsing purposes. Do not use unless you know what you are doing. + =item rank - The sequence number of the reading. This should probably not be set manually. =back @@ -98,6 +100,12 @@ has 'is_lacuna' => ( isa => 'Bool', default => undef, ); + +has 'is_ph' => ( + is => 'ro', + isa => 'Bool', + default => undef, + ); has 'rank' => ( is => 'rw', @@ -128,6 +136,8 @@ around BUILDARGS => sub { } elsif( exists $args->{'is_end'} ) { $args->{'id'} = '#END#'; # Change the ID to ensure we have only one $args->{'text'} = '#END#'; + } elsif( exists $args->{'is_ph'} ) { + $args->{'text'} = $args->{'id'}; } $class->$orig( $args ); @@ -143,7 +153,7 @@ of text found in a witness. sub is_meta { my $self = shift; - return $self->is_start || $self->is_end || $self->is_lacuna; + return $self->is_start || $self->is_end || $self->is_lacuna || $self->is_ph; } # Some syntactic sugar diff --git a/lib/Text/Tradition/Parser/CTE.pm b/lib/Text/Tradition/Parser/CTE.pm index b21da1b..c4877cc 100644 --- a/lib/Text/Tradition/Parser/CTE.pm +++ b/lib/Text/Tradition/Parser/CTE.pm @@ -32,6 +32,9 @@ initializes the Tradition from the file. my %sigil_for; # Save the XML IDs for witnesses. my %apps; # Save the apparatus XML for a given ID. my %has_ac; # Keep track of witnesses that have corrections. +my %group_sigla = ( ## HACK HACK HACK + '#M38' => [ qw( #M23 #M24 #M25 #M27 #M30 #M26 #M31 #M32 #M33 ) ], # l -> L* + ); sub parse { my( $tradition, $opts ) = @_; @@ -60,8 +63,10 @@ sub parse { my $id= $wit_el->getAttribute( 'xml:id' ); my @sig_parts = $xpc->findnodes( './abbr/descendant::text()', $wit_el ); my $sig = _stringify_sigil( @sig_parts ); - $tradition->add_witness( sigil => $sig, source => $wit_el->toString() ); - $sigil_for{'#'.$id} = $sig; # Make life easy by keying on the ID ref syntax + unless( exists $group_sigla{'#'.$id} ) { ## More HACKY + $tradition->add_witness( sigil => $sig, source => $wit_el->toString() ); + $sigil_for{'#'.$id} = $sig; # Make life easy by keying on the ID ref syntax + } } # Now go through the text and find the base tokens, apparatus tags, and @@ -85,15 +90,14 @@ sub parse { foreach my $item ( @base_text ) { my $r; if( $item->{'type'} eq 'token' ) { - $r = $c->add_reading( 'n'.$counter++ ); - $r->text( $item->{'content'} ); + $r = $c->add_reading( { id => 'n'.$counter++, + text => $item->{'content'} } ); } elsif ( $item->{'type'} eq 'anchor' ) { - $r = $c->add_reading( '#ANCHOR_' . $item->{'content'} . '#' ); - $r->is_meta(1); + $r = $c->add_reading( { id => '#ANCHOR_' . $item->{'content'} . '#', + is_ph => 1 } ); } elsif ( $item->{'type'} eq 'app' ) { my $tag = '#APP_' . $counter++ . '#'; - $r = $c->add_reading( $tag ); - $r->is_meta(1); + $r = $c->add_reading( { id => $tag, is_ph => 1 } ); $apps{$tag} = $item->{'content'}; } $c->add_path( $last, $r, $c->baselabel ); @@ -169,29 +173,33 @@ sub _add_readings { my $ctr = 0; my $tag = $app_id; $tag =~ s/^\#APP_(.*)\#$/$1/; - $DB::single = 1 if $tag < 2; foreach my $rdg ( $xn->getChildrenByTagName( 'rdg' ) ) { my @text; foreach ( $rdg->childNodes ) { push( @text, _get_base( $_ ) ); } - my $interpreted = @text - ? interpret( join( ' ', map { $_->{'content'} } @text ), $lemma_str ) - : ''; + my( $interpreted, $flag ) = ( '', undef ); + if( @text ) { + ( $interpreted, $flag ) = interpret( + join( ' ', map { $_->{'content'} } @text ), $lemma_str ); + } my @rdg_nodes; foreach my $w ( split( /\s+/, $interpreted ) ) { - my $r = $c->add_reading( $tag . "/" . $ctr++ ); - $r->text( $w ); + my $r = $c->add_reading( { id => $tag . "/" . $ctr++, + text => $w } ); push( @rdg_nodes, $r ); } # For each listed wit, save the reading. foreach my $wit ( split( /\s+/, $rdg->getAttribute( 'wit' ) ) ) { + $wit .= $flag if $flag; $wit_rdgs{$wit} = \@rdg_nodes; } + # Does the reading have an ID? If so it probably has a witDetail # attached, and we need to read it. if( $rdg->hasAttribute( 'xml:id' ) ) { + warn "Witdetail on meta reading" if $flag; # this could get complicated. my $rid = $rdg->getAttribute( 'xml:id' ); my $xpc = XML::LibXML::XPathContext->new( $xn ); my @details = $xpc->findnodes( './witDetail[@target="'.$rid.'"]' ); @@ -202,7 +210,24 @@ sub _add_readings { } # Now collate the variant readings, since it is not done for us. - collate_variants( $c, \@lemma, values %wit_rdgs ); + collate_variants( $c, \@lemma, values %wit_rdgs ); + + # HACKY HACKY Expand "group" sigla. + # Does not work for nested groups; also does not work with a modifier + # on the group sigil. + foreach my $wit_id ( keys %group_sigla ) { + if ( exists $wit_rdgs{$wit_id} ) { + my $rdg = $wit_rdgs{$wit_id}; + foreach my $w ( @{$group_sigla{$wit_id}} ) { + if( exists $wit_rdgs{$w} ) { + $DB::single = 1; + warn "Had reading for individual member $w of group $wit_id at $xn"; + } + $wit_rdgs{$w} = $rdg; + } + delete $wit_rdgs{$wit_id}; + } + } # Now add the witness paths for each reading. foreach my $wit_id ( keys %wit_rdgs ) { @@ -220,10 +245,9 @@ sub _anchor_name { sub _return_lemma { my( $c, $app, $anchor ) = @_; - my $app_node = $c->graph->node( $app ); - my $anchor_node = $c->graph->node( $anchor ); - my @nodes = grep { $_->name !~ /^\#A(PP|NCHOR)/ } - $c->reading_sequence( $app_node, $anchor_node, $c->baselabel ); + my @nodes = grep { $_->id !~ /^\#A(PP|NCHOR)/ } + $c->reading_sequence( $c->reading( $app ), $c->reading( $anchor ), + $c->baselabel ); return @nodes; } @@ -234,26 +258,34 @@ sub interpret { my $oldreading = $reading; # $lemma =~ s/\s+[[:punct:]]+$//; # $reading =~ s/\s*\(?sic([\s\w.]+)?\)?$//; + my $flag; # In case of p.c. indications my @words = split( /\s+/, $lemma ); if( $reading =~ /^(.*) praem.$/ ) { $reading = "$1 $lemma"; } elsif( $reading =~ /^(.*) add.$/ ) { $reading = "$lemma $1"; + } elsif( $reading =~ /add. alia manu/ ) { + # Ignore it. + $reading = $lemma; } elsif( $reading eq 'om.' || $reading =~ /locus [uv]acuus/ || $reading =~ /inscriptionem compegi e/ # TODO huh? || $reading eq 'def.' # TODO huh? ) { $reading = ''; - } elsif( $reading eq 'inv.' ) { + } elsif( $reading =~ /^in[uv]\.$/ ) { # Hope it is two words. print STDERR "WARNING: want to invert a lemma that is not two words\n" unless scalar( @words ) == 2; $reading = join( ' ', reverse( @words ) ); - } elsif( $reading eq 'iter.' ) { + } elsif( $reading =~ /^iter(\.|at)$/ ) { # Repeat the lemma $reading = "$lemma $lemma"; - } elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) { + } elsif( $reading eq 'in marg.' ) { + # There was nothing before a correction. + $reading = ''; + $flag = '_ac'; + } elsif( $reading =~ /^(.*) \.\.\. (.*)$/ ) { # The first and last N words captured should replace the first and # last N words of the lemma. my @begin = split( /\s+/, $1 ); @@ -267,8 +299,12 @@ sub interpret { $reading = join( ' ', @words ); } } - print STDERR "Interpreted $oldreading as $reading given $lemma\n"; - return $reading; + if( $oldreading ne $reading || $flag || $oldreading =~ /\./ ) { + my $int = $reading; + $int .= " ($flag)" if $flag; + print STDERR "Interpreted $oldreading as $int given $lemma\n"; + } + return( $reading, $flag ); } sub _parse_wit_detail { @@ -305,22 +341,22 @@ sub expand_all_paths { # Walk the collation and fish out the paths for each witness foreach my $wit ( $c->tradition->witnesses ) { my $sig = $wit->sigil; - my @path = grep { $_->name !~ /(APP|ANCHOR)/ } + my @path = grep { !$_->is_ph } $c->reading_sequence( $c->start, $c->end, $sig ); $wit->path( \@path ); if( $has_ac{$sig} ) { - my @ac_path = grep { $_->name !~ /(APP|ANCHOR)/ } + my @ac_path = grep { !$_->is_ph } $c->reading_sequence( $c->start, $c->end, $sig.$c->ac_label, $sig ); $wit->uncorrected_path( \@ac_path ); } } # Delete the anchors - foreach my $anchor ( grep { $_->name =~ /(APP|ANCHOR)/ } $c->readings ) { + foreach my $anchor ( grep { $_->is_ph } $c->readings ) { $c->del_reading( $anchor ); } - # Delete all edges - map { $c->del_path( $_ ) } $c->paths; + # Delete the base edges + map { $c->del_path( $_, $c->baselabel ) } $c->paths; # Make the path edges $c->make_witness_paths();