remove some debugging statements
[scpubgit/stemmatology.git] / script / make_tradition.pl
CommitLineData
910a0a6d 1#!/usr/bin/env perl
2
3use lib 'lib';
4use strict;
5use warnings;
6use Getopt::Long;
56c56f0d 7use TryCatch;
910a0a6d 8use Text::Tradition;
861c3e27 9use Text::Tradition::Directory;
56c56f0d 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars /;
910a0a6d 11
12binmode STDERR, ":utf8";
13binmode STDOUT, ":utf8";
14eval { no warnings; binmode $DB::OUT, ":utf8"; };
15
10943ab0 16# Variables with defaults
17my( $informat, $outformat, $language, $name, $sep, $dsn ) = ( '', '', 'Default',
18 'Tradition', "\t", "dbi:SQLite:dbname=stemmaweb/db/traditions.db" );
19# Variables with no default
b63589d0 20my( $inbase, $help, $stemmafile, $dbuser, $dbpass, $from, $to, $dbid, $debug, $nonlinear );
910a0a6d 21
408449b7 22GetOptions( 'i|in=s' => \$informat,
23 'b|base=s' => \$inbase,
24 'o|out=s' => \$outformat,
6a1c434d 25 'l|language=s' => \$language,
861c3e27 26 'n|name=s' => \$name,
408449b7 27 'h|help' => \$help,
861c3e27 28 's|stemma=s' => \$stemmafile,
7d99d254 29 'u|user=s' => \$dbuser,
30 'p|pass=s' => \$dbpass,
fd7014c4 31 'f|from=s' => \$from,
32 't|to=s' => \$to,
b63589d0 33 'nl|nonlinear' => \$nonlinear,
a7fb3133 34 'sep=s' => \$sep,
861c3e27 35 'dsn=s' => \$dsn,
28333e88 36 'dbid=s' => \$dbid,
10943ab0 37 'debug' => \$debug
910a0a6d 38 );
39
40if( $help ) {
41 help();
42}
43
00e822da 44unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX|tab(ular)?)|stone|db$/i ) {
910a0a6d 45 help( "Input format must be one of CollateX, CSV, CTE, Self, TEI" );
46}
47$informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i;
48$informat = 'KUL' if $informat =~ /^kul$/i;
49$informat = 'CTE' if $informat =~ /^cte$/i;
50$informat = 'Self' if $informat =~ /^self$/i;
51$informat = 'TEI' if $informat =~ /^tei$/i;
d9e873d0 52$informat = 'Tabular' if $informat =~ /^tab$/i;
fa954f4c 53$informat = 'CollateText' if $informat =~ /^stone$/i;
910a0a6d 54
861c3e27 55unless( $outformat =~ /^(graphml|svg|dot|stemma|csv|db)$/ ) {
56 help( "Output format must be one of db, graphml, svg, csv, stemma, or dot" );
910a0a6d 57}
58
fd7014c4 59if( $from || $to ) {
60 help( "Subgraphs only supported in GraphML format" )
61 unless $outformat eq 'graphml';
62}
63
910a0a6d 64# Do we have a base if we need it?
fa954f4c 65if( $informat =~ /^(KUL|CollateText)$/ && !$inbase ) {
910a0a6d 66 help( "$informat input needs a base text" );
67}
aa71409f 68$sep = "\t" if $sep eq 'tab';
fa954f4c 69
910a0a6d 70my $input = $ARGV[0];
00e822da 71my $tradition;
72my $dir;
73if( $informat eq 'db' ) {
74 my $dbargs = { dsn => $dsn };
75 $dbargs->{'extra_args'}->{'user'} = $dbuser if $dbuser;
76 $dbargs->{'extra_args'}->{'password'} = $dbpass if $dbpass;
77 $dir = Text::Tradition::Directory->new( $dbargs );
78 my $scope = $dir->new_scope();
79 $tradition = $dir->lookup( $input );
80} else {
81 # First: read the base. Make a graph, but also note which
82 # nodes represent line beginnings.
83 my %args = ( 'input' => $informat,
84 'file' => $input );
b63589d0 85 $args{'linear'} = 0 if $nonlinear;
00e822da 86 $args{'base'} = $inbase if $inbase;
87 $args{'language'} = $language if $language;
88 $args{'name'} = $name if $name;
89 $args{'sep_char'} = $sep if $informat eq 'Tabular';
90 ### Custom hacking for Stone
91 if( $informat eq 'CollateText' ) {
92 $args{'sigla'} = [ qw/ S M X V Z Bb B K W L / ];
93 }
94 $tradition = Text::Tradition->new( %args );
fa954f4c 95}
861c3e27 96if( $stemmafile ) {
173ecc07 97 my $stemma = $tradition->add_stemma( dotfile => $stemmafile );
861c3e27 98 print STDERR "Saved stemma at $stemmafile\n" if $stemma;
99}
910a0a6d 100
910a0a6d 101# Now output what we have been asked to.
102if( $outformat eq 'stemma' ) {
56c56f0d 103 my $cdata = character_input( $tradition->collation->alignment_table );
104 try {
105 print phylip_pars( $cdata );
106 } catch( Text::Tradition::Error $e ) {
107 print STDERR "Bad result: " . $e->message;
910a0a6d 108 }
861c3e27 109} elsif( $outformat eq 'db' ) {
00e822da 110 unless( $dir ) {
111 my $extra_args = { 'create' => 1 };
112 $extra_args->{'user'} = $dbuser if $dbuser;
113 $extra_args->{'password'} = $dbpass if $dbpass;
114 $dir = Text::Tradition::Directory->new( 'dsn' => $dsn,
115 'extra_args' => $extra_args );
116 }
861c3e27 117 my $scope = $dir->new_scope;
28333e88 118 my $uuid;
119 if( $dbid ) {
120 $uuid = $dir->store( $dbid => $tradition );
121 } else {
122 $uuid = $dir->store( $tradition );
123 }
861c3e27 124 print STDERR "Saved tradition to database with ID $uuid\n";
910a0a6d 125} else {
126 my $output = "as_$outformat";
fd7014c4 127 my $opts = {};
128 $opts->{'from'} = $from if $from;
129 $opts->{'to'} = $to if $to;
10943ab0 130 $opts->{'nocalc'} = 1 if $debug;
fd7014c4 131 print $tradition->collation->$output( $opts );
910a0a6d 132}
133
134sub help {
135 my( $msg ) = @_;
136 print STDERR << "EOF"
137Usage: $0 -i [format] -o [format] (--base [filename]) (--(no)linear) [inputfile]
138 i, input: Format of the input file. Must be one of CollateX, CSV, CTE, Self, TEI.
139 o, output: Format of the output. Must be one of svg, dot, graphml, csv, stemma.
140 b, base: Filename that contains a base text. Needed for CSV input.
141 l, linear: Treat transposed readings separately, producing a linear graph.
142 If nolinear, treat transposed readings as the same node.
143 h, help: Print this message.
144EOF
145 ;
146 if( $msg ) {
147 print STDERR "$msg\n";
148 }
149 exit ($msg ? 1 : 0 );
150}