Commit | Line | Data |
910a0a6d |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
4 | use strict; |
5 | use warnings; |
6 | use Getopt::Long; |
7 | use Text::Tradition; |
861c3e27 |
8 | use Text::Tradition::Directory; |
4e5a7b2c |
9 | use Text::Tradition::StemmaUtil; |
910a0a6d |
10 | |
11 | binmode STDERR, ":utf8"; |
12 | binmode STDOUT, ":utf8"; |
13 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
14 | |
861c3e27 |
15 | my( $informat, $inbase, $outformat, $help, $linear, $name, $HACK, $sep, $stemmafile, $dsn ) |
16 | = ( '', '', '', '', 1, 'Tradition', 0, "\t", '', |
17 | "dbi:SQLite:dbname=stemmaweb/db/traditions.db" ); |
910a0a6d |
18 | |
408449b7 |
19 | GetOptions( 'i|in=s' => \$informat, |
20 | 'b|base=s' => \$inbase, |
21 | 'o|out=s' => \$outformat, |
910a0a6d |
22 | 'l|linear!' => \$linear, |
861c3e27 |
23 | 'n|name=s' => \$name, |
408449b7 |
24 | 'h|help' => \$help, |
861c3e27 |
25 | 's|stemma=s' => \$stemmafile, |
a7fb3133 |
26 | 'sep=s' => \$sep, |
408449b7 |
27 | 'hack' => \$HACK, |
861c3e27 |
28 | 'dsn=s' => \$dsn, |
910a0a6d |
29 | ); |
30 | |
31 | if( $help ) { |
32 | help(); |
33 | } |
34 | |
fa954f4c |
35 | unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX|tab(ular)?)|stone$/i ) { |
910a0a6d |
36 | help( "Input format must be one of CollateX, CSV, CTE, Self, TEI" ); |
37 | } |
38 | $informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i; |
39 | $informat = 'KUL' if $informat =~ /^kul$/i; |
40 | $informat = 'CTE' if $informat =~ /^cte$/i; |
41 | $informat = 'Self' if $informat =~ /^self$/i; |
42 | $informat = 'TEI' if $informat =~ /^tei$/i; |
d9e873d0 |
43 | $informat = 'Tabular' if $informat =~ /^tab$/i; |
fa954f4c |
44 | $informat = 'CollateText' if $informat =~ /^stone$/i; |
910a0a6d |
45 | |
861c3e27 |
46 | unless( $outformat =~ /^(graphml|svg|dot|stemma|csv|db)$/ ) { |
47 | help( "Output format must be one of db, graphml, svg, csv, stemma, or dot" ); |
910a0a6d |
48 | } |
49 | |
50 | # Do we have a base if we need it? |
fa954f4c |
51 | if( $informat =~ /^(KUL|CollateText)$/ && !$inbase ) { |
910a0a6d |
52 | help( "$informat input needs a base text" ); |
53 | } |
aa71409f |
54 | $sep = "\t" if $sep eq 'tab'; |
fa954f4c |
55 | |
910a0a6d |
56 | my $input = $ARGV[0]; |
910a0a6d |
57 | |
58 | # First: read the base. Make a graph, but also note which |
59 | # nodes represent line beginnings. |
dfc37e38 |
60 | my %args = ( 'input' => $informat, |
61 | 'file' => $input, |
910a0a6d |
62 | 'linear' => $linear ); |
63 | $args{'base'} = $inbase if $inbase; |
408449b7 |
64 | $args{'name'} = $name if $name; |
a7fb3133 |
65 | $args{'sep_char'} = $sep if $informat eq 'Tabular'; |
fa954f4c |
66 | ### Custom hacking for Stone |
67 | if( $informat eq 'CollateText' ) { |
68 | $args{'sigla'} = [ qw/ S M X V Z Bb B K W L / ]; |
69 | } |
910a0a6d |
70 | my $tradition = Text::Tradition->new( %args ); |
861c3e27 |
71 | if( $stemmafile ) { |
72 | my $stemma = $tradition->add_stemma( $stemmafile ); |
73 | print STDERR "Saved stemma at $stemmafile\n" if $stemma; |
74 | } |
910a0a6d |
75 | |
76 | ### Custom hacking |
77 | # Remove witnesses C, E, G in the Matthew text |
78 | if( $HACK ) { |
4e5a7b2c |
79 | my @togo = qw/ C E G /; |
80 | $tradition->collation->clear_witness( @togo ); |
81 | $tradition->del_witness( @togo ); |
910a0a6d |
82 | } |
83 | |
84 | # Now output what we have been asked to. |
85 | if( $outformat eq 'stemma' ) { |
4e5a7b2c |
86 | my $cdata = character_input( $tradition->collation->make_alignment_table ); |
87 | my( $result, $tree ) = phylip_pars( $cdata ); |
910a0a6d |
88 | if( $result ) { |
89 | print $tree; |
90 | } else { |
91 | print STDERR "Bad result: $tree"; |
92 | } |
861c3e27 |
93 | } elsif( $outformat eq 'db' ) { |
94 | my $dir = Text::Tradition::Directory->new( 'dsn' => $dsn, |
95 | 'extra_args' => { 'create' => 1 } ); |
96 | my $scope = $dir->new_scope; |
97 | my $uuid = $dir->store( $tradition ); |
98 | print STDERR "Saved tradition to database with ID $uuid\n"; |
910a0a6d |
99 | } else { |
100 | my $output = "as_$outformat"; |
101 | print $tradition->collation->$output(); |
102 | } |
103 | |
104 | sub help { |
105 | my( $msg ) = @_; |
106 | print STDERR << "EOF" |
107 | Usage: $0 -i [format] -o [format] (--base [filename]) (--(no)linear) [inputfile] |
108 | i, input: Format of the input file. Must be one of CollateX, CSV, CTE, Self, TEI. |
109 | o, output: Format of the output. Must be one of svg, dot, graphml, csv, stemma. |
110 | b, base: Filename that contains a base text. Needed for CSV input. |
111 | l, linear: Treat transposed readings separately, producing a linear graph. |
112 | If nolinear, treat transposed readings as the same node. |
113 | h, help: Print this message. |
114 | EOF |
115 | ; |
116 | if( $msg ) { |
117 | print STDERR "$msg\n"; |
118 | } |
119 | exit ($msg ? 1 : 0 ); |
120 | } |