From: Tara L Andrews Date: Wed, 22 Aug 2012 19:56:54 +0000 (+0200) Subject: exclude punctuation X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ead7032e2e1975dfb29951cb2e3794b27ada14b9;p=scpubgit%2Fstemmatology.git exclude punctuation --- diff --git a/script/analyze.pl b/script/analyze.pl index 5477c64..21a124a 100755 --- a/script/analyze.pl +++ b/script/analyze.pl @@ -18,8 +18,7 @@ my $calcdsn = 'dbi:SQLite:dbname=db/graphs.db'; my $scope = $dir->new_scope(); my $lookfor = shift @ARGV || ''; -my %collapse; -map { $collapse{$_} = 1 } @ARGV; +my @collapse = @ARGV; my @relation_types = grep { !$collapse{$_} } qw/ orthographic spelling grammatical lexical transposition addition deletion @@ -60,9 +59,12 @@ foreach my $tinfo( $dir->traditionlist ) { $datahash{text_name} = $tradition->name; # Run the analysis for each row in @rows - my %opts = ( exclude_type1 => 1, calcdsn => $calcdsn ); - if( keys %collapse ) { - $opts{merge_types} = [ keys %collapse ]; + my %opts = ( + exclude_type1 => 1, + merge_types => [ 'punctuation' ], + calcdsn => $calcdsn ); + if( @collapse ) { + push( @{$opts{merge_types}}, @collapse ); } my $result = run_analysis( $tradition, %opts ); @@ -87,7 +89,6 @@ foreach my $tinfo( $dir->traditionlist ) { my @roots = @{$rdghash->{independent_occurrence}}; next if @roots == 1 && !$rdghash->{'followed'} && !$rdghash->{'not_followed'} && !$rdghash->{'follow_unknown'}; - # TODO Weed out punctuation my $rdg = $tradition->collation->reading( $rdghash->{readingid} ); my $type; if( $rdghash->{'is_conflict'} ) {