Commit | Line | Data |
910a0a6d |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
4 | use strict; |
5 | use warnings; |
6 | use Getopt::Long; |
56c56f0d |
7 | use TryCatch; |
910a0a6d |
8 | use Text::Tradition; |
861c3e27 |
9 | use Text::Tradition::Directory; |
d9befa73 |
10 | use Text::Tradition::StemmaUtil qw/ character_input phylip_pars newick_to_svg /; |
910a0a6d |
11 | |
12 | binmode STDERR, ":utf8"; |
13 | binmode STDOUT, ":utf8"; |
14 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
15 | |
10943ab0 |
16 | # Variables with defaults |
17 | my( $informat, $outformat, $language, $name, $sep, $dsn ) = ( '', '', 'Default', |
d8dd6236 |
18 | 'Tradition', "\t", "dbi:SQLite:dbname=db/traditions.db" ); |
10943ab0 |
19 | # Variables with no default |
a188b944 |
20 | my( $inbase, $help, $stemmafile, $dbuser, $dbpass, $from, $to, $dbid, |
21 | $nocalc, $nonlinear ); |
910a0a6d |
22 | |
408449b7 |
23 | GetOptions( 'i|in=s' => \$informat, |
24 | 'b|base=s' => \$inbase, |
25 | 'o|out=s' => \$outformat, |
6a1c434d |
26 | 'l|language=s' => \$language, |
861c3e27 |
27 | 'n|name=s' => \$name, |
408449b7 |
28 | 'h|help' => \$help, |
861c3e27 |
29 | 's|stemma=s' => \$stemmafile, |
7d99d254 |
30 | 'u|user=s' => \$dbuser, |
31 | 'p|pass=s' => \$dbpass, |
fd7014c4 |
32 | 'f|from=s' => \$from, |
33 | 't|to=s' => \$to, |
b63589d0 |
34 | 'nl|nonlinear' => \$nonlinear, |
a7fb3133 |
35 | 'sep=s' => \$sep, |
861c3e27 |
36 | 'dsn=s' => \$dsn, |
a188b944 |
37 | 'dbid=s' => \$dbid, |
38 | 'nc|nocalc' => \$nocalc, |
910a0a6d |
39 | ); |
40 | |
41 | if( $help ) { |
42 | help(); |
43 | } |
44 | |
ce5966fb |
45 | unless( $informat =~ /^(CTE|KUL|Self|TEI|CollateX|tab(ular)?)|xlsx?|db$/i ) { |
46 | help( "Input format must be one of CollateX, CTE, Self, TEI, Tabular, XLS(X), or DB" ); |
910a0a6d |
47 | } |
9a36afc0 |
48 | my $excel = $informat =~ /^xls/i ? lc( $informat ) : undef; |
910a0a6d |
49 | $informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i; |
50 | $informat = 'KUL' if $informat =~ /^kul$/i; |
51 | $informat = 'CTE' if $informat =~ /^cte$/i; |
52 | $informat = 'Self' if $informat =~ /^self$/i; |
53 | $informat = 'TEI' if $informat =~ /^tei$/i; |
d9e873d0 |
54 | $informat = 'Tabular' if $informat =~ /^tab$/i; |
fa954f4c |
55 | $informat = 'CollateText' if $informat =~ /^stone$/i; |
9a36afc0 |
56 | $informat = 'Tabular' if $informat =~ /^xls/i; |
910a0a6d |
57 | |
87b00ae5 |
58 | unless( $outformat =~ /^(graphml|svg|dot|adj(acency)?|stemma(svg)?|(c|t)sv|db)$/i ) { |
59 | help( "Output format must be one of db, graphml, svg, csv, tsv, stemma, adjacency, or dot" ); |
910a0a6d |
60 | } |
87b00ae5 |
61 | $outformat = 'adjacency_list' if $outformat =~ /^adj/i; |
910a0a6d |
62 | |
fd7014c4 |
63 | if( $from || $to ) { |
87b00ae5 |
64 | help( "Subgraphs only supported in GraphML, dot, adjacency, or SVG format" ) |
65 | unless $outformat =~ /^(graphml|dot|svg|adjacency_list)$/; |
fd7014c4 |
66 | } |
67 | |
910a0a6d |
68 | # Do we have a base if we need it? |
fa954f4c |
69 | if( $informat =~ /^(KUL|CollateText)$/ && !$inbase ) { |
910a0a6d |
70 | help( "$informat input needs a base text" ); |
71 | } |
aa71409f |
72 | $sep = "\t" if $sep eq 'tab'; |
fa954f4c |
73 | |
910a0a6d |
74 | my $input = $ARGV[0]; |
00e822da |
75 | my $tradition; |
76 | my $dir; |
77 | if( $informat eq 'db' ) { |
78 | my $dbargs = { dsn => $dsn }; |
79 | $dbargs->{'extra_args'}->{'user'} = $dbuser if $dbuser; |
80 | $dbargs->{'extra_args'}->{'password'} = $dbpass if $dbpass; |
81 | $dir = Text::Tradition::Directory->new( $dbargs ); |
82 | my $scope = $dir->new_scope(); |
83 | $tradition = $dir->lookup( $input ); |
84 | } else { |
85 | # First: read the base. Make a graph, but also note which |
86 | # nodes represent line beginnings. |
87 | my %args = ( 'input' => $informat, |
88 | 'file' => $input ); |
b63589d0 |
89 | $args{'linear'} = 0 if $nonlinear; |
00e822da |
90 | $args{'base'} = $inbase if $inbase; |
91 | $args{'language'} = $language if $language; |
92 | $args{'name'} = $name if $name; |
a188b944 |
93 | $args{'nocalc'} = 1 if $nocalc; |
9a36afc0 |
94 | if( $informat eq 'Tabular' ) { |
95 | if( $excel ) { |
96 | $args{'excel'} = $excel; |
97 | } else { |
98 | $args{'sep_char'} = $sep; |
99 | } |
100 | } |
1922aeb5 |
101 | # If we are writing to the database, use that DB as the userstore. |
102 | if( $outformat eq 'db' ) { |
103 | unless( $dir ) { |
104 | my $extra_args = { 'create' => 1 }; |
105 | $extra_args->{'user'} = $dbuser if $dbuser; |
106 | $extra_args->{'password'} = $dbpass if $dbpass; |
107 | $dir = Text::Tradition::Directory->new( 'dsn' => $dsn, |
108 | 'extra_args' => $extra_args ); |
109 | } |
110 | $args{'userstore'} = $dir; |
111 | } |
00e822da |
112 | ### Custom hacking for Stone |
113 | if( $informat eq 'CollateText' ) { |
114 | $args{'sigla'} = [ qw/ S M X V Z Bb B K W L / ]; |
115 | } |
1922aeb5 |
116 | my $scope = $dir->new_scope() if $dir; |
00e822da |
117 | $tradition = Text::Tradition->new( %args ); |
fa954f4c |
118 | } |
861c3e27 |
119 | if( $stemmafile ) { |
173ecc07 |
120 | my $stemma = $tradition->add_stemma( dotfile => $stemmafile ); |
861c3e27 |
121 | print STDERR "Saved stemma at $stemmafile\n" if $stemma; |
122 | } |
910a0a6d |
123 | |
910a0a6d |
124 | # Now output what we have been asked to. |
d9befa73 |
125 | if( $outformat =~ /^stemma(.*)$/ ) { |
126 | my $type = $1 || 'newick'; |
b39e7cb5 |
127 | my $cdata = character_input( $tradition ); |
d9befa73 |
128 | my $newick; |
56c56f0d |
129 | try { |
d9befa73 |
130 | $newick = phylip_pars( $cdata ); |
56c56f0d |
131 | } catch( Text::Tradition::Error $e ) { |
d9befa73 |
132 | print STDERR "Bad result from pars: " . $e->message; |
133 | exit; |
134 | } |
135 | if( $type eq 'newick' ) { |
136 | print $newick; |
137 | } elsif( $type eq 'svg' ) { |
138 | print newick_to_svg( $newick ); |
910a0a6d |
139 | } |
861c3e27 |
140 | } elsif( $outformat eq 'db' ) { |
00e822da |
141 | unless( $dir ) { |
142 | my $extra_args = { 'create' => 1 }; |
143 | $extra_args->{'user'} = $dbuser if $dbuser; |
144 | $extra_args->{'password'} = $dbpass if $dbpass; |
145 | $dir = Text::Tradition::Directory->new( 'dsn' => $dsn, |
146 | 'extra_args' => $extra_args ); |
147 | } |
861c3e27 |
148 | my $scope = $dir->new_scope; |
28333e88 |
149 | my $uuid; |
150 | if( $dbid ) { |
151 | $uuid = $dir->store( $dbid => $tradition ); |
152 | } else { |
153 | $uuid = $dir->store( $tradition ); |
154 | } |
861c3e27 |
155 | print STDERR "Saved tradition to database with ID $uuid\n"; |
910a0a6d |
156 | } else { |
157 | my $output = "as_$outformat"; |
fd7014c4 |
158 | my $opts = {}; |
159 | $opts->{'from'} = $from if $from; |
160 | $opts->{'to'} = $to if $to; |
a188b944 |
161 | $opts->{'nocalc'} = 1 if $nocalc; |
fd7014c4 |
162 | print $tradition->collation->$output( $opts ); |
910a0a6d |
163 | } |
164 | |
165 | sub help { |
166 | my( $msg ) = @_; |
167 | print STDERR << "EOF" |
168 | Usage: $0 -i [format] -o [format] (--base [filename]) (--(no)linear) [inputfile] |
169 | i, input: Format of the input file. Must be one of CollateX, CSV, CTE, Self, TEI. |
170 | o, output: Format of the output. Must be one of svg, dot, graphml, csv, stemma. |
171 | b, base: Filename that contains a base text. Needed for CSV input. |
172 | l, linear: Treat transposed readings separately, producing a linear graph. |
173 | If nolinear, treat transposed readings as the same node. |
174 | h, help: Print this message. |
175 | EOF |
176 | ; |
177 | if( $msg ) { |
178 | print STDERR "$msg\n"; |
179 | } |
180 | exit ($msg ? 1 : 0 ); |
181 | } |