pull tradition from DB if asked
[scpubgit/stemmatology.git] / script / make_tradition.pl
CommitLineData
910a0a6d 1#!/usr/bin/env perl
2
3use lib 'lib';
4use strict;
5use warnings;
6use Getopt::Long;
56c56f0d 7use TryCatch;
910a0a6d 8use Text::Tradition;
861c3e27 9use Text::Tradition::Directory;
56c56f0d 10use Text::Tradition::StemmaUtil qw/ character_input phylip_pars /;
910a0a6d 11
12binmode STDERR, ":utf8";
13binmode STDOUT, ":utf8";
14eval { no warnings; binmode $DB::OUT, ":utf8"; };
15
10943ab0 16# Variables with defaults
17my( $informat, $outformat, $language, $name, $sep, $dsn ) = ( '', '', 'Default',
18 'Tradition', "\t", "dbi:SQLite:dbname=stemmaweb/db/traditions.db" );
19# Variables with no default
20my( $inbase, $help, $stemmafile, $dbuser, $dbpass, $from, $to, $dbid, $debug );
910a0a6d 21
408449b7 22GetOptions( 'i|in=s' => \$informat,
23 'b|base=s' => \$inbase,
24 'o|out=s' => \$outformat,
6a1c434d 25 'l|language=s' => \$language,
861c3e27 26 'n|name=s' => \$name,
408449b7 27 'h|help' => \$help,
861c3e27 28 's|stemma=s' => \$stemmafile,
7d99d254 29 'u|user=s' => \$dbuser,
30 'p|pass=s' => \$dbpass,
fd7014c4 31 'f|from=s' => \$from,
32 't|to=s' => \$to,
a7fb3133 33 'sep=s' => \$sep,
861c3e27 34 'dsn=s' => \$dsn,
28333e88 35 'dbid=s' => \$dbid,
10943ab0 36 'debug' => \$debug
910a0a6d 37 );
38
39if( $help ) {
40 help();
41}
42
00e822da 43unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX|tab(ular)?)|stone|db$/i ) {
910a0a6d 44 help( "Input format must be one of CollateX, CSV, CTE, Self, TEI" );
45}
46$informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i;
47$informat = 'KUL' if $informat =~ /^kul$/i;
48$informat = 'CTE' if $informat =~ /^cte$/i;
49$informat = 'Self' if $informat =~ /^self$/i;
50$informat = 'TEI' if $informat =~ /^tei$/i;
d9e873d0 51$informat = 'Tabular' if $informat =~ /^tab$/i;
fa954f4c 52$informat = 'CollateText' if $informat =~ /^stone$/i;
910a0a6d 53
861c3e27 54unless( $outformat =~ /^(graphml|svg|dot|stemma|csv|db)$/ ) {
55 help( "Output format must be one of db, graphml, svg, csv, stemma, or dot" );
910a0a6d 56}
57
fd7014c4 58if( $from || $to ) {
59 help( "Subgraphs only supported in GraphML format" )
60 unless $outformat eq 'graphml';
61}
62
910a0a6d 63# Do we have a base if we need it?
fa954f4c 64if( $informat =~ /^(KUL|CollateText)$/ && !$inbase ) {
910a0a6d 65 help( "$informat input needs a base text" );
66}
aa71409f 67$sep = "\t" if $sep eq 'tab';
fa954f4c 68
910a0a6d 69my $input = $ARGV[0];
00e822da 70my $tradition;
71my $dir;
72if( $informat eq 'db' ) {
73 my $dbargs = { dsn => $dsn };
74 $dbargs->{'extra_args'}->{'user'} = $dbuser if $dbuser;
75 $dbargs->{'extra_args'}->{'password'} = $dbpass if $dbpass;
76 $dir = Text::Tradition::Directory->new( $dbargs );
77 my $scope = $dir->new_scope();
78 $tradition = $dir->lookup( $input );
79} else {
80 # First: read the base. Make a graph, but also note which
81 # nodes represent line beginnings.
82 my %args = ( 'input' => $informat,
83 'file' => $input );
84 $args{'base'} = $inbase if $inbase;
85 $args{'language'} = $language if $language;
86 $args{'name'} = $name if $name;
87 $args{'sep_char'} = $sep if $informat eq 'Tabular';
88 ### Custom hacking for Stone
89 if( $informat eq 'CollateText' ) {
90 $args{'sigla'} = [ qw/ S M X V Z Bb B K W L / ];
91 }
92 $tradition = Text::Tradition->new( %args );
fa954f4c 93}
861c3e27 94if( $stemmafile ) {
173ecc07 95 my $stemma = $tradition->add_stemma( dotfile => $stemmafile );
861c3e27 96 print STDERR "Saved stemma at $stemmafile\n" if $stemma;
97}
910a0a6d 98
910a0a6d 99# Now output what we have been asked to.
100if( $outformat eq 'stemma' ) {
56c56f0d 101 my $cdata = character_input( $tradition->collation->alignment_table );
102 try {
103 print phylip_pars( $cdata );
104 } catch( Text::Tradition::Error $e ) {
105 print STDERR "Bad result: " . $e->message;
910a0a6d 106 }
861c3e27 107} elsif( $outformat eq 'db' ) {
00e822da 108 unless( $dir ) {
109 my $extra_args = { 'create' => 1 };
110 $extra_args->{'user'} = $dbuser if $dbuser;
111 $extra_args->{'password'} = $dbpass if $dbpass;
112 $dir = Text::Tradition::Directory->new( 'dsn' => $dsn,
113 'extra_args' => $extra_args );
114 }
861c3e27 115 my $scope = $dir->new_scope;
28333e88 116 my $uuid;
117 if( $dbid ) {
118 $uuid = $dir->store( $dbid => $tradition );
119 } else {
120 $uuid = $dir->store( $tradition );
121 }
861c3e27 122 print STDERR "Saved tradition to database with ID $uuid\n";
910a0a6d 123} else {
124 my $output = "as_$outformat";
fd7014c4 125 my $opts = {};
126 $opts->{'from'} = $from if $from;
127 $opts->{'to'} = $to if $to;
10943ab0 128 $opts->{'nocalc'} = 1 if $debug;
fd7014c4 129 print $tradition->collation->$output( $opts );
910a0a6d 130}
131
132sub help {
133 my( $msg ) = @_;
134 print STDERR << "EOF"
135Usage: $0 -i [format] -o [format] (--base [filename]) (--(no)linear) [inputfile]
136 i, input: Format of the input file. Must be one of CollateX, CSV, CTE, Self, TEI.
137 o, output: Format of the output. Must be one of svg, dot, graphml, csv, stemma.
138 b, base: Filename that contains a base text. Needed for CSV input.
139 l, linear: Treat transposed readings separately, producing a linear graph.
140 If nolinear, treat transposed readings as the same node.
141 h, help: Print this message.
142EOF
143 ;
144 if( $msg ) {
145 print STDERR "$msg\n";
146 }
147 exit ($msg ? 1 : 0 );
148}