change Matthew hack, add language flag
[scpubgit/stemmatology.git] / script / strip_punctuation.pl
CommitLineData
6f0ec5df 1#!/usr/bin/env perl
2
3use lib 'lib';
4use strict;
5use warnings;
6use Text::Tradition::Directory;
7
32e95735 8binmode STDERR, ':utf8';
9
6f0ec5df 10my( $dsn, $user, $pass ) = @ARGV;
11
12my $connect_args = { dsn => $dsn };
13$connect_args->{'extra_args'} = { user => $user, password => $pass }
14 if $user && $pass;
15my $dir = Text::Tradition::Directory->new( $connect_args );
16
17foreach my $id ( $dir->tradition_ids ) {
18 my $scope = $dir->new_scope;
19 my $tradition = $dir->lookup( $id );
20 print STDERR "Processing tradition " . $tradition->name . "\n";
21 foreach my $reading ( $tradition->collation->readings ) {
32e95735 22 next if $reading->is_meta;
6f0ec5df 23 $reading->alter_text( strip_punct( $reading->text ) );
24 }
25 $tradition->collation->flatten_ranks;
26 $dir->save( $tradition );
27}
28
29print STDERR "Done\n";
30
31sub strip_punct {
32 my( $rtext ) = @_;
33 my $orig_r = $rtext;
34 return $rtext unless $rtext =~ /\w/;
35 $rtext =~ s/^\W+//;
36 $rtext =~ s/\W+$//;
37 print STDERR "Altering $orig_r to $rtext\n"
38 unless $orig_r eq $rtext;
39 return $rtext;
40}
41