Commit | Line | Data |
910a0a6d |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
4 | use strict; |
5 | use warnings; |
6 | use Getopt::Long; |
7 | use Text::Tradition; |
8 | use Text::Tradition::Stemma; |
9 | |
10 | binmode STDERR, ":utf8"; |
11 | binmode STDOUT, ":utf8"; |
12 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
13 | |
14 | my( $informat, $inbase, $outformat, $help, $linear, $HACK ) |
15 | = ( '', '', '', '', 1, 0 ); |
16 | |
17 | GetOptions( 'i|in=s' => \$informat, |
18 | 'b|base=s' => \$inbase, |
19 | 'o|out=s' => \$outformat, |
20 | 'l|linear!' => \$linear, |
21 | 'h|help' => \$help, |
22 | 'hack' => \$HACK, |
23 | ); |
24 | |
25 | if( $help ) { |
26 | help(); |
27 | } |
28 | |
d9e873d0 |
29 | unless( $informat =~ /^(CSV|CTE|KUL|Self|TEI|CollateX|tab(ular)?)$/i ) { |
910a0a6d |
30 | help( "Input format must be one of CollateX, CSV, CTE, Self, TEI" ); |
31 | } |
32 | $informat = 'CollateX' if $informat =~ /^c(ollate)?x$/i; |
33 | $informat = 'KUL' if $informat =~ /^kul$/i; |
34 | $informat = 'CTE' if $informat =~ /^cte$/i; |
35 | $informat = 'Self' if $informat =~ /^self$/i; |
36 | $informat = 'TEI' if $informat =~ /^tei$/i; |
d9e873d0 |
37 | $informat = 'Tabular' if $informat =~ /^tab$/i; |
910a0a6d |
38 | |
39 | unless( $outformat =~ /^(graphml|svg|dot|stemma|csv)$/ ) { |
40 | help( "Output format must be one of graphml, svg, csv, stemma, or dot" ); |
41 | } |
42 | |
43 | # Do we have a base if we need it? |
44 | if( $informat eq 'KUL' && !$inbase ) { |
45 | help( "$informat input needs a base text" ); |
46 | } |
47 | |
48 | # CSV parsing requires a filename; XML parsing requires a string. |
49 | my $input = $ARGV[0]; |
50 | unless( $informat eq 'KUL' || $informat eq 'CSV' ) { |
51 | my @lines; |
52 | open( INFILE, "$input" ) or die "Could not read $input"; |
d9e873d0 |
53 | binmode INFILE, ':utf8'; |
910a0a6d |
54 | @lines = <INFILE>; |
55 | close INFILE; |
56 | $input = join( '', @lines ); |
57 | } |
58 | |
59 | # First: read the base. Make a graph, but also note which |
60 | # nodes represent line beginnings. |
61 | my %args = ( $informat => $input, |
62 | 'linear' => $linear ); |
63 | $args{'base'} = $inbase if $inbase; |
64 | my $tradition = Text::Tradition->new( %args ); |
65 | |
66 | ### Custom hacking |
67 | # Remove witnesses C, E, G in the Matthew text |
68 | if( $HACK ) { |
69 | foreach( $tradition->collation->paths() ) { |
70 | $tradition->collation->del_path( $_ ) if $_->label =~ /^[ceg]$/i; |
71 | } |
72 | foreach( $tradition->collation->readings() ) { |
73 | if( !$_->outgoing() && !$_->incoming() ) { |
74 | print STDERR "Deleting reading " . $_->label . "\n"; |
75 | $tradition->collation->del_reading( $_ ); |
76 | } |
77 | } |
78 | } |
79 | |
80 | # Now output what we have been asked to. |
81 | if( $outformat eq 'stemma' ) { |
82 | my $stemma = Text::Tradition::Stemma->new( |
83 | 'collation' => $tradition->collation ); |
40f19742 |
84 | my( $result, $tree ) = $stemma->run_phylip_pars(); |
910a0a6d |
85 | if( $result ) { |
86 | print $tree; |
87 | } else { |
88 | print STDERR "Bad result: $tree"; |
89 | } |
90 | } else { |
91 | my $output = "as_$outformat"; |
92 | print $tradition->collation->$output(); |
93 | } |
94 | |
95 | sub help { |
96 | my( $msg ) = @_; |
97 | print STDERR << "EOF" |
98 | Usage: $0 -i [format] -o [format] (--base [filename]) (--(no)linear) [inputfile] |
99 | i, input: Format of the input file. Must be one of CollateX, CSV, CTE, Self, TEI. |
100 | o, output: Format of the output. Must be one of svg, dot, graphml, csv, stemma. |
101 | b, base: Filename that contains a base text. Needed for CSV input. |
102 | l, linear: Treat transposed readings separately, producing a linear graph. |
103 | If nolinear, treat transposed readings as the same node. |
104 | h, help: Print this message. |
105 | EOF |
106 | ; |
107 | if( $msg ) { |
108 | print STDERR "$msg\n"; |
109 | } |
110 | exit ($msg ? 1 : 0 ); |
111 | } |