X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FParser%2FBaseText.pm;h=76e4f3617d0e1bcec088ad8a23b6d108e3444177;hb=6e526d95ab3ff2878cedc28808984d3ec6a16c88;hp=a5ab34a8d05814637d8ce4bb28f693a7968991e5;hpb=49d4f2accb29737fdbaa42f75062d2055c4bc2ef;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Parser/BaseText.pm b/lib/Text/Tradition/Parser/BaseText.pm index a5ab34a..76e4f36 100644 --- a/lib/Text/Tradition/Parser/BaseText.pm +++ b/lib/Text/Tradition/Parser/BaseText.pm @@ -3,7 +3,9 @@ package Text::Tradition::Parser::BaseText; use strict; use warnings; use Module::Load; -use Text::Tradition::Parser::Util qw( collate_variants cmp_str check_for_repeated add_hash_entry ); +use TryCatch; +use Text::Tradition::Parser::Util qw( collate_variants cmp_str + check_for_repeated add_hash_entry ); =head1 NAME @@ -47,7 +49,7 @@ sub parse { load( $format_mod ); # TODO Handle a string someday if we ever have a format other than KUL my @apparatus_entries = $format_mod->can('read')->( $opts ); - merge_base( $tradition->collation, $opts->{'base'}, @apparatus_entries ); + merge_base( $tradition->collation, $opts, @apparatus_entries ); } =item B @@ -85,8 +87,8 @@ my $edits_required = {}; # edits_required -> wit -> [ { start_idx, end_idx, items } ] sub merge_base { - my( $collation, $base_file, @app_entries ) = @_; - my @base_line_starts = read_base( $base_file, $collation ); + my( $collation, $opts, @app_entries ) = @_; + my @base_line_starts = read_base( $opts->{'base'}, $collation ); my %all_witnesses; foreach my $app ( @app_entries ) { @@ -168,7 +170,7 @@ sub merge_base { my @lemma_set = $collation->reading_sequence( $lemma_start, $lemma_end ); my @reading_sets = [ @lemma_set ]; - + # For each reading that is not rdg_0, we create the variant # reading nodes, and store the range as an edit operation on # the base text. @@ -247,7 +249,8 @@ sub merge_base { # Now make the witness objects, and create their text sequences foreach my $w ( grep { $_ !~ /_post$/ } keys %$edits_required ) { print STDERR "Creating witness $w\n"; - my $witness_obj = $collation->tradition->add_witness( sigil => $w ); + my $witness_obj = $collation->tradition->add_witness( + sigil => $w, sourcetype => 'collation' ); my $debug; # = $w eq 'Vb11'; my @ante_corr_seq = apply_edits( $collation, $edits_required->{$w}, $debug ); my @post_corr_seq = apply_edits( $collation, $edits_required->{$w."_post"}, $debug ) @@ -277,9 +280,11 @@ sub merge_base { } ### HACKY HACKY Do some one-off path corrections here. - require( 'data/boodts/s158.HACK' ); - KUL::HACK::pre_path_hack( $collation ); - + if( $opts->{'input'} eq 'KUL' ) { + require 'data/boodts/s158.HACK'; + KUL::HACK::pre_path_hack( $collation ); + } + # Now walk paths and calculate positional rank. $collation->make_witness_paths(); # Now delete any orphaned readings. @@ -289,7 +294,7 @@ sub merge_base { $collation->del_reading( $r ); } - KUL::HACK::post_path_hack( $collation ); + KUL::HACK::post_path_hack( $collation ) if $opts->{'input'} eq 'KUL'; # Have to check relationship validity at this point, because before that # we had no paths. # foreach my $rel ( $collation->relationships ) { @@ -299,7 +304,7 @@ sub merge_base { # $rel->type, $rel->from->id, $rel->to->id ); # } # } - $collation->calculate_ranks(); + $collation->calculate_common_readings(); # will implicitly rank } =item B @@ -390,10 +395,20 @@ sub set_relationships { $r->id ne $labels{$r->text}->id ) { if( $type eq 'repetition' ) { # Repetition - $collation->add_relationship( $r, $labels{$r->text}, \%rel_options ); + try { + $collation->add_relationship( $r, $labels{$r->text}, \%rel_options ); + } catch( Text::Tradition::Error $e ) { + warn "Could not set repetition relationship $r -> " + . $labels{$r->text} . ": " . $e->message; + } } else { # Transposition - $r->set_identical( $labels{$r->text} ); + try { + $r->set_identical( $labels{$r->text} ); + } catch( Text::Tradition::Error $e ) { + warn "Could not set transposition relationship $r -> " + . $labels{$r->text} . ": " . $e->message; + } } } } @@ -411,8 +426,13 @@ sub set_relationships { $rel_options{'equal_rank'} = 1; if( @$lemma == @$var ) { foreach my $i ( 0 .. $#{$lemma} ) { - $collation->add_relationship( $var->[$i], $lemma->[$i], - \%rel_options ); + try { + $collation->add_relationship( $var->[$i], $lemma->[$i], + \%rel_options ); + } catch( Text::Tradition::Error $e ) { + warn "Could not set $type relationship " . $var->[$i] . " -> " + . $lemma->[$i] . ": " . $e->message; + } } } else { # An uneven many-to-many mapping. Skip for now. @@ -420,9 +440,9 @@ sub set_relationships { # my $lemseg = @$lemma > 1 ? $collation->add_segment( @$lemma ) : $lemma->[0]; # my $varseg = @$var > 1 ? $collation->add_segment( @$var ) : $var->[0]; # $collation->add_relationship( $varseg, $lemseg, \%rel_options ); - if( @$lemma == 1 && @$var == 1 ) { - $collation->add_relationship( $lemma->[0], $var->[0], \%rel_options ); - } + # if( @$lemma == 1 && @$var == 1 ) { + # $collation->add_relationship( $lemma->[0], $var->[0], \%rel_options ); + # } } } elsif( $type !~ /^(add|om|lex)$/i ) { warn "Unrecognized type $type";