make French morphology use Lingua objects; add tests
[scpubgit/stemmatology.git] / script / strip_punctuation.pl
CommitLineData
6f0ec5df 1#!/usr/bin/env perl
2
3use lib 'lib';
4use strict;
5use warnings;
6use Text::Tradition::Directory;
7
32e95735 8binmode STDERR, ':utf8';
9
6f0ec5df 10my( $dsn, $user, $pass ) = @ARGV;
11
12my $connect_args = { dsn => $dsn };
13$connect_args->{'extra_args'} = { user => $user, password => $pass }
14 if $user && $pass;
15my $dir = Text::Tradition::Directory->new( $connect_args );
16
98a6cab2 17foreach my $text ( $dir->traditionlist ) {
18 my $id = $text->{'id'};
6f0ec5df 19 my $scope = $dir->new_scope;
20 my $tradition = $dir->lookup( $id );
21 print STDERR "Processing tradition " . $tradition->name . "\n";
22 foreach my $reading ( $tradition->collation->readings ) {
32e95735 23 next if $reading->is_meta;
6f0ec5df 24 $reading->alter_text( strip_punct( $reading->text ) );
25 }
26 $tradition->collation->flatten_ranks;
27 $dir->save( $tradition );
28}
29
30print STDERR "Done\n";
31
32sub strip_punct {
33 my( $rtext ) = @_;
34 my $orig_r = $rtext;
35 return $rtext unless $rtext =~ /\w/;
36 $rtext =~ s/^\W+//;
37 $rtext =~ s/\W+$//;
38 print STDERR "Altering $orig_r to $rtext\n"
39 unless $orig_r eq $rtext;
40 return $rtext;
41}
42