revert untested changes that were causing a segfault
[scpubgit/stemmatology.git] / script / make_tradition.pl
CommitLineData
910a0a6d 1#!/usr/bin/env perl
2
3use lib 'lib';
4use strict;
5use warnings;
6use Getopt::Long;
56c56f0d 7use TryCatch;
910a0a6d 8use Text::Tradition;
861c3e27 9use Text::Tradition::Directory;
56c56f0d 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars /;
910a0a6d 11
12binmode STDERR, ":utf8";
13binmode STDOUT, ":utf8";
14eval { no warnings; binmode $DB::OUT, ":utf8"; };
15
652e0b6e 16my( $informat, $inbase, $outformat, $help, $language, $name, $sep, $stemmafile,
7d99d254 17 $dsn, $dbuser, $dbpass )
652e0b6e 18 = ( '', '', '', '', 'Default', 'Tradition', "\t", '',
7d99d254 19 "dbi:SQLite:dbname=stemmaweb/db/traditions.db", undef, undef );
910a0a6d 20
408449b7 21GetOptions( 'i|in=s' => \$informat,
22 'b|base=s' => \$inbase,
23 'o|out=s' => \$outformat,
6a1c434d 24 'l|language=s' => \$language,
861c3e27 25 'n|name=s' => \$name,
408449b7 26 'h|help' => \$help,
861c3e27 27 's|stemma=s' => \$stemmafile,
7d99d254 28 'u|user=s' => \$dbuser,
29 'p|pass=s' => \$dbpass,
a7fb3133 30 'sep=s' => \$sep,
861c3e27 31 'dsn=s' => \$dsn,
910a0a6d 32 );
33
34if( $help ) {
35 help();
36}
37
fa954f4c 38unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX|tab(ular)?)|stone$/i ) {
910a0a6d 39 help( "Input format must be one of CollateX, CSV, CTE, Self, TEI" );
40}
41$informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i;
42$informat = 'KUL' if $informat =~ /^kul$/i;
43$informat = 'CTE' if $informat =~ /^cte$/i;
44$informat = 'Self' if $informat =~ /^self$/i;
45$informat = 'TEI' if $informat =~ /^tei$/i;
d9e873d0 46$informat = 'Tabular' if $informat =~ /^tab$/i;
fa954f4c 47$informat = 'CollateText' if $informat =~ /^stone$/i;
910a0a6d 48
861c3e27 49unless( $outformat =~ /^(graphml|svg|dot|stemma|csv|db)$/ ) {
50 help( "Output format must be one of db, graphml, svg, csv, stemma, or dot" );
910a0a6d 51}
52
53# Do we have a base if we need it?
fa954f4c 54if( $informat =~ /^(KUL|CollateText)$/ && !$inbase ) {
910a0a6d 55 help( "$informat input needs a base text" );
56}
aa71409f 57$sep = "\t" if $sep eq 'tab';
fa954f4c 58
910a0a6d 59my $input = $ARGV[0];
910a0a6d 60
61# First: read the base. Make a graph, but also note which
62# nodes represent line beginnings.
dfc37e38 63my %args = ( 'input' => $informat,
6a1c434d 64 'file' => $input );
910a0a6d 65$args{'base'} = $inbase if $inbase;
6a1c434d 66$args{'language'} = $language if $language;
408449b7 67$args{'name'} = $name if $name;
a7fb3133 68$args{'sep_char'} = $sep if $informat eq 'Tabular';
fa954f4c 69### Custom hacking for Stone
70if( $informat eq 'CollateText' ) {
71 $args{'sigla'} = [ qw/ S M X V Z Bb B K W L / ];
72}
910a0a6d 73my $tradition = Text::Tradition->new( %args );
861c3e27 74if( $stemmafile ) {
173ecc07 75 my $stemma = $tradition->add_stemma( dotfile => $stemmafile );
861c3e27 76 print STDERR "Saved stemma at $stemmafile\n" if $stemma;
77}
910a0a6d 78
910a0a6d 79# Now output what we have been asked to.
80if( $outformat eq 'stemma' ) {
56c56f0d 81 my $cdata = character_input( $tradition->collation->alignment_table );
82 try {
83 print phylip_pars( $cdata );
84 } catch( Text::Tradition::Error $e ) {
85 print STDERR "Bad result: " . $e->message;
910a0a6d 86 }
861c3e27 87} elsif( $outformat eq 'db' ) {
7d99d254 88 my $extra_args = { 'create' => 1 };
89 $extra_args->{'user'} = $dbuser if $dbuser;
90 $extra_args->{'password'} = $dbpass if $dbpass;
861c3e27 91 my $dir = Text::Tradition::Directory->new( 'dsn' => $dsn,
7d99d254 92 'extra_args' => $extra_args );
861c3e27 93 my $scope = $dir->new_scope;
94 my $uuid = $dir->store( $tradition );
95 print STDERR "Saved tradition to database with ID $uuid\n";
910a0a6d 96} else {
97 my $output = "as_$outformat";
98 print $tradition->collation->$output();
99}
100
101sub help {
102 my( $msg ) = @_;
103 print STDERR << "EOF"
104Usage: $0 -i [format] -o [format] (--base [filename]) (--(no)linear) [inputfile]
105 i, input: Format of the input file. Must be one of CollateX, CSV, CTE, Self, TEI.
106 o, output: Format of the output. Must be one of svg, dot, graphml, csv, stemma.
107 b, base: Filename that contains a base text. Needed for CSV input.
108 l, linear: Treat transposed readings separately, producing a linear graph.
109 If nolinear, treat transposed readings as the same node.
110 h, help: Print this message.
111EOF
112 ;
113 if( $msg ) {
114 print STDERR "$msg\n";
115 }
116 exit ($msg ? 1 : 0 );
117}