allow stemma dot to be file or string
[scpubgit/stemmatology.git] / script / make_tradition.pl
CommitLineData
910a0a6d 1#!/usr/bin/env perl
2
3use lib 'lib';
4use strict;
5use warnings;
6use Getopt::Long;
7use Text::Tradition;
861c3e27 8use Text::Tradition::Directory;
4e5a7b2c 9use Text::Tradition::StemmaUtil;
910a0a6d 10
11binmode STDERR, ":utf8";
12binmode STDOUT, ":utf8";
13eval { no warnings; binmode $DB::OUT, ":utf8"; };
14
861c3e27 15my( $informat, $inbase, $outformat, $help, $linear, $name, $HACK, $sep, $stemmafile, $dsn )
16 = ( '', '', '', '', 1, 'Tradition', 0, "\t", '',
17 "dbi:SQLite:dbname=stemmaweb/db/traditions.db" );
910a0a6d 18
408449b7 19GetOptions( 'i|in=s' => \$informat,
20 'b|base=s' => \$inbase,
21 'o|out=s' => \$outformat,
910a0a6d 22 'l|linear!' => \$linear,
861c3e27 23 'n|name=s' => \$name,
408449b7 24 'h|help' => \$help,
861c3e27 25 's|stemma=s' => \$stemmafile,
a7fb3133 26 'sep=s' => \$sep,
408449b7 27 'hack' => \$HACK,
861c3e27 28 'dsn=s' => \$dsn,
910a0a6d 29 );
30
31if( $help ) {
32 help();
33}
34
fa954f4c 35unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX|tab(ular)?)|stone$/i ) {
910a0a6d 36 help( "Input format must be one of CollateX, CSV, CTE, Self, TEI" );
37}
38$informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i;
39$informat = 'KUL' if $informat =~ /^kul$/i;
40$informat = 'CTE' if $informat =~ /^cte$/i;
41$informat = 'Self' if $informat =~ /^self$/i;
42$informat = 'TEI' if $informat =~ /^tei$/i;
d9e873d0 43$informat = 'Tabular' if $informat =~ /^tab$/i;
fa954f4c 44$informat = 'CollateText' if $informat =~ /^stone$/i;
910a0a6d 45
861c3e27 46unless( $outformat =~ /^(graphml|svg|dot|stemma|csv|db)$/ ) {
47 help( "Output format must be one of db, graphml, svg, csv, stemma, or dot" );
910a0a6d 48}
49
50# Do we have a base if we need it?
fa954f4c 51if( $informat =~ /^(KUL|CollateText)$/ && !$inbase ) {
910a0a6d 52 help( "$informat input needs a base text" );
53}
aa71409f 54$sep = "\t" if $sep eq 'tab';
fa954f4c 55
910a0a6d 56my $input = $ARGV[0];
910a0a6d 57
58# First: read the base. Make a graph, but also note which
59# nodes represent line beginnings.
dfc37e38 60my %args = ( 'input' => $informat,
61 'file' => $input,
910a0a6d 62 'linear' => $linear );
63$args{'base'} = $inbase if $inbase;
408449b7 64$args{'name'} = $name if $name;
a7fb3133 65$args{'sep_char'} = $sep if $informat eq 'Tabular';
fa954f4c 66### Custom hacking for Stone
67if( $informat eq 'CollateText' ) {
68 $args{'sigla'} = [ qw/ S M X V Z Bb B K W L / ];
69}
910a0a6d 70my $tradition = Text::Tradition->new( %args );
861c3e27 71if( $stemmafile ) {
72 my $stemma = $tradition->add_stemma( $stemmafile );
73 print STDERR "Saved stemma at $stemmafile\n" if $stemma;
74}
910a0a6d 75
76### Custom hacking
77# Remove witnesses C, E, G in the Matthew text
78if( $HACK ) {
4e5a7b2c 79 my @togo = qw/ C E G /;
80 $tradition->collation->clear_witness( @togo );
81 $tradition->del_witness( @togo );
910a0a6d 82}
83
84# Now output what we have been asked to.
85if( $outformat eq 'stemma' ) {
4e5a7b2c 86 my $cdata = character_input( $tradition->collation->make_alignment_table );
87 my( $result, $tree ) = phylip_pars( $cdata );
910a0a6d 88 if( $result ) {
89 print $tree;
90 } else {
91 print STDERR "Bad result: $tree";
92 }
861c3e27 93} elsif( $outformat eq 'db' ) {
94 my $dir = Text::Tradition::Directory->new( 'dsn' => $dsn,
95 'extra_args' => { 'create' => 1 } );
96 my $scope = $dir->new_scope;
97 my $uuid = $dir->store( $tradition );
98 print STDERR "Saved tradition to database with ID $uuid\n";
910a0a6d 99} else {
100 my $output = "as_$outformat";
101 print $tradition->collation->$output();
102}
103
104sub help {
105 my( $msg ) = @_;
106 print STDERR << "EOF"
107Usage: $0 -i [format] -o [format] (--base [filename]) (--(no)linear) [inputfile]
108 i, input: Format of the input file. Must be one of CollateX, CSV, CTE, Self, TEI.
109 o, output: Format of the output. Must be one of svg, dot, graphml, csv, stemma.
110 b, base: Filename that contains a base text. Needed for CSV input.
111 l, linear: Treat transposed readings separately, producing a linear graph.
112 If nolinear, treat transposed readings as the same node.
113 h, help: Print this message.
114EOF
115 ;
116 if( $msg ) {
117 print STDERR "$msg\n";
118 }
119 exit ($msg ? 1 : 0 );
120}