pull the rank out of the start and end nodes
[scpubgit/stemmatology.git] / script / make_tradition.pl
CommitLineData
910a0a6d 1#!/usr/bin/env perl
2
3use lib 'lib';
4use strict;
5use warnings;
6use Getopt::Long;
7use Text::Tradition;
8use Text::Tradition::Stemma;
9
10binmode STDERR, ":utf8";
11binmode STDOUT, ":utf8";
12eval { no warnings; binmode $DB::OUT, ":utf8"; };
13
a7fb3133 14my( $informat, $inbase, $outformat, $help, $linear, $name, $HACK, $sep )
15 = ( '', '', '', '', 1, 'Tradition', 0, ',' );
910a0a6d 16
408449b7 17GetOptions( 'i|in=s' => \$informat,
18 'b|base=s' => \$inbase,
19 'o|out=s' => \$outformat,
910a0a6d 20 'l|linear!' => \$linear,
408449b7 21 'n|name' => \$name,
22 'h|help' => \$help,
a7fb3133 23 'sep=s' => \$sep,
408449b7 24 'hack' => \$HACK,
910a0a6d 25 );
26
27if( $help ) {
28 help();
29}
30
fa954f4c 31unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX|tab(ular)?)|stone$/i ) {
910a0a6d 32 help( "Input format must be one of CollateX, CSV, CTE, Self, TEI" );
33}
34$informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i;
35$informat = 'KUL' if $informat =~ /^kul$/i;
36$informat = 'CTE' if $informat =~ /^cte$/i;
37$informat = 'Self' if $informat =~ /^self$/i;
38$informat = 'TEI' if $informat =~ /^tei$/i;
d9e873d0 39$informat = 'Tabular' if $informat =~ /^tab$/i;
fa954f4c 40$informat = 'CollateText' if $informat =~ /^stone$/i;
910a0a6d 41
42unless( $outformat =~ /^(graphml|svg|dot|stemma|csv)$/ ) {
43 help( "Output format must be one of graphml, svg, csv, stemma, or dot" );
44}
45
46# Do we have a base if we need it?
fa954f4c 47if( $informat =~ /^(KUL|CollateText)$/ && !$inbase ) {
910a0a6d 48 help( "$informat input needs a base text" );
49}
50
fa954f4c 51
910a0a6d 52my $input = $ARGV[0];
910a0a6d 53
54# First: read the base. Make a graph, but also note which
55# nodes represent line beginnings.
dfc37e38 56my %args = ( 'input' => $informat,
57 'file' => $input,
910a0a6d 58 'linear' => $linear );
59$args{'base'} = $inbase if $inbase;
408449b7 60$args{'name'} = $name if $name;
a7fb3133 61$args{'sep_char'} = $sep if $informat eq 'Tabular';
fa954f4c 62### Custom hacking for Stone
63if( $informat eq 'CollateText' ) {
64 $args{'sigla'} = [ qw/ S M X V Z Bb B K W L / ];
65}
910a0a6d 66my $tradition = Text::Tradition->new( %args );
67
68### Custom hacking
69# Remove witnesses C, E, G in the Matthew text
70if( $HACK ) {
71 foreach( $tradition->collation->paths() ) {
72 $tradition->collation->del_path( $_ ) if $_->label =~ /^[ceg]$/i;
73 }
74 foreach( $tradition->collation->readings() ) {
75 if( !$_->outgoing() && !$_->incoming() ) {
76 print STDERR "Deleting reading " . $_->label . "\n";
77 $tradition->collation->del_reading( $_ );
78 }
79 }
80}
81
82# Now output what we have been asked to.
83if( $outformat eq 'stemma' ) {
84 my $stemma = Text::Tradition::Stemma->new(
85 'collation' => $tradition->collation );
40f19742 86 my( $result, $tree ) = $stemma->run_phylip_pars();
910a0a6d 87 if( $result ) {
88 print $tree;
89 } else {
90 print STDERR "Bad result: $tree";
91 }
92} else {
93 my $output = "as_$outformat";
94 print $tradition->collation->$output();
95}
96
97sub help {
98 my( $msg ) = @_;
99 print STDERR << "EOF"
100Usage: $0 -i [format] -o [format] (--base [filename]) (--(no)linear) [inputfile]
101 i, input: Format of the input file. Must be one of CollateX, CSV, CTE, Self, TEI.
102 o, output: Format of the output. Must be one of svg, dot, graphml, csv, stemma.
103 b, base: Filename that contains a base text. Needed for CSV input.
104 l, linear: Treat transposed readings separately, producing a linear graph.
105 If nolinear, treat transposed readings as the same node.
106 h, help: Print this message.
107EOF
108 ;
109 if( $msg ) {
110 print STDERR "$msg\n";
111 }
112 exit ($msg ? 1 : 0 );
113}