From: Tara L Andrews <tla@mit.edu>
Date: Tue, 31 May 2011 19:08:36 +0000 (+0200)
Subject: handle 'non 1/2' construction in lemma
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0e96be5f070118d580e3156c5bae757a8f78f0d4;p=scpubgit%2Fstemmatology.git

handle 'non 1/2' construction in lemma
---

diff --git a/lib/Text/Tradition/Parser/CSV.pm b/lib/Text/Tradition/Parser/CSV.pm
index a4836aa..93a321e 100644
--- a/lib/Text/Tradition/Parser/CSV.pm
+++ b/lib/Text/Tradition/Parser/CSV.pm
@@ -2,6 +2,7 @@ package Text::Tradition::Parser::CSV;
 
 use strict;
 use warnings;
+use Storable qw /dclone/;
 use Text::CSV::Simple;
 
 =head1 NAME
@@ -55,33 +56,43 @@ sub read {
 	# apparatus to the list, and clear it out, if we have started a
 	# new reading.
 	if( $new_lemma ) {
-	    push( @app_list, $apparatus ) if keys %$apparatus;
-	    $apparatus = { _id => $linehash{reference},
-	    };
+	    # Was it a doubled-up apparatus entry e.g 'non (1/2)'?
+	    if( keys %$apparatus &&
+		$apparatus->{'rdg_0'} =~ /(.*?)\s+\(?([\d\/]+)\)?\s*$/ ) {
+		my( $reading, $istr ) = ( $1, $2 );
+		my @instances = split( /\//, $istr );
+		foreach my $i ( @instances ) {
+		    my $app = dclone( $apparatus );
+		    $app->{'rdg_0'} = $reading . "_$i";
+		    $app->{'_id'} .= chr(97+$i);
+		    push( @app_list, $app );
+		}
+	    } elsif( keys %$apparatus ) {
+		push( @app_list, $apparatus );
+	    }
+	    $apparatus = { _id => $linehash{reference} };
 	    $rdg_ctr = 0;
 	}
 	# The apparatus has multiple readings, and multiple witnesses per
 	# reading.  So it's a hashref whose values are listrefs.
-	$apparatus->{ 'rdg_0' } = $linehash{ 'text' } 
-        if $linehash{ 'text' };
-	$apparatus->{ 'rdg_' . ++$rdg_ctr } = $linehash{ 'variant' };
+	$apparatus->{'rdg_0'} = $linehash{'text'} if $linehash{'text'};
+	$apparatus->{'rdg_' . ++$rdg_ctr} = $linehash{'variant'};
 	foreach my $attr ( @fields[3..8] ) {
-	    $apparatus->{"_rdg_${rdg_ctr}_$attr"} = $linehash{ $attr }
-	    if $linehash{ $attr };
+	    $apparatus->{"_rdg_${rdg_ctr}_$attr"} = $linehash{$attr} if $linehash{$attr};
 	}
 	
 	foreach my $k ( @fields[10..$#fields] ) {
 	    my $variant_rdg = $linehash{$k};
 	    $k =~ s/\s+\(a\.c\.\)//;
 	    if( $variant_rdg =~ /^0/ ) {
-		$apparatus->{ $k } = 'rdg_0'
-		    unless exists $apparatus->{ $k };
+		$apparatus->{$k} = 'rdg_0'
+		    unless exists $apparatus->{$k};
 	    } elsif ( $variant_rdg =~ /^1/ ) {
-		$apparatus->{ $k } = 'rdg_' . $rdg_ctr;
+		$apparatus->{$k} = 'rdg_' . $rdg_ctr;
 	    } else { # else for $, we don't list the MS
 		warn "Unparsed variant indicator $variant_rdg for $k in " .
 		    $apparatus->{'_id'}
-	        unless ( !$variant_rdg or $variant_rdg =~ /^\$$/ );
+	            unless ( !$variant_rdg or $variant_rdg =~ /^\$$/ );
 	    }
 	}
 	# See if we have at least one reading for each variant.