Commit | Line | Data |
f6066bac |
1 | package Text::Tradition::Parser::TEI; |
2 | |
3 | use strict; |
4 | use warnings; |
00311328 |
5 | use Text::Tradition::Error; |
910a0a6d |
6 | use Text::Tradition::Parser::Util qw( collate_variants ); |
f6066bac |
7 | use XML::LibXML; |
8 | use XML::LibXML::XPathContext; |
9 | |
10 | =head1 NAME |
11 | |
12 | Text::Tradition::Parser::TEI |
13 | |
3b853983 |
14 | =head1 SYNOPSIS |
15 | |
16 | use Text::Tradition; |
17 | |
18 | my $t_from_file = Text::Tradition->new( |
19 | 'name' => 'my text', |
20 | 'input' => 'TEI', |
21 | 'file' => '/path/to/parallel_seg_file.xml' |
22 | ); |
23 | |
24 | my $t_from_string = Text::Tradition->new( |
25 | 'name' => 'my text', |
26 | 'input' => 'TEI', |
27 | 'string' => $parallel_seg_xml, |
28 | ); |
29 | |
30 | |
f6066bac |
31 | =head1 DESCRIPTION |
32 | |
3b853983 |
33 | Parser module for Text::Tradition, given a TEI parallel-segmentation file |
34 | that describes a text and its variants. Normally called upon |
35 | initialization of Text::Tradition. |
36 | |
37 | The witnesses for the tradition are taken from the <listWit/> element |
38 | within the TEI header; the readings are taken from any <p/> element that |
39 | appears in the text body (including <head/> elements therein.) |
f6066bac |
40 | |
41 | =head1 METHODS |
42 | |
e867486f |
43 | =head2 B<parse>( $tradition, $option_hash ) |
3b853983 |
44 | |
45 | Takes an initialized tradition and a set of options; creates the |
46 | appropriate nodes and edges on the graph, as well as the appropriate |
47 | witness objects. The $option_hash must contain either a 'file' or a |
48 | 'string' argument with the XML to be parsed. |
49 | |
50 | =begin testing |
51 | |
52 | use Text::Tradition; |
53 | binmode STDOUT, ":utf8"; |
54 | binmode STDERR, ":utf8"; |
55 | eval { no warnings; binmode $DB::OUT, ":utf8"; }; |
56 | |
57 | my $par_seg = 't/data/florilegium_tei_ps.xml'; |
58 | my $t = Text::Tradition->new( |
59 | 'name' => 'inline', |
60 | 'input' => 'TEI', |
61 | 'file' => $par_seg, |
62 | ); |
f6066bac |
63 | |
3b853983 |
64 | is( ref( $t ), 'Text::Tradition', "Parsed parallel-segmentation TEI" ); |
65 | if( $t ) { |
56eefa04 |
66 | is( scalar $t->collation->readings, 311, "Collation has all readings" ); |
67 | is( scalar $t->collation->paths, 361, "Collation has all paths" ); |
3b853983 |
68 | } |
f6066bac |
69 | |
f08498a5 |
70 | # Try to re-parse it, ensure we can use the parser twice in the same Perl |
71 | # invocation |
72 | |
73 | my $t2 = Text::Tradition->new( |
74 | 'name' => 'inline', |
75 | 'input' => 'TEI', |
76 | 'file' => $par_seg, |
77 | ); |
78 | |
79 | is( ref( $t2 ), 'Text::Tradition', "Parsed parallel-segmentation TEI again" ); |
80 | |
3b853983 |
81 | =end testing |
f6066bac |
82 | |
83 | =cut |
84 | |
910a0a6d |
85 | my $text = {}; # Hash of arrays, one per eventual witness we find. |
910a0a6d |
86 | my $substitutions = {}; # Keep track of merged readings |
87 | my $app_anchors = {}; # Track apparatus references |
88 | my $app_ac = {}; # Save a.c. readings |
eca16057 |
89 | my $app_count; # Keep track of how many apps we have |
910a0a6d |
90 | |
91 | # Create the package variables for tag names. |
92 | |
93 | # Would really like to do this with varname variables, but apparently this |
94 | # is considered a bad idea. The long way round then. |
f08498a5 |
95 | my( $LISTWIT, $WITNESS, $TEXT, $W, $SEG, $APP, $RDG, $LEM ); |
3b853983 |
96 | sub _make_tagnames { |
910a0a6d |
97 | my( $ns ) = @_; |
f08498a5 |
98 | ( $LISTWIT, $WITNESS, $TEXT, $W, $SEG, $APP, $RDG, $LEM ) |
99 | = ( 'listWit', 'witness', 'text', 'w', 'seg', 'app', 'rdg', 'lem' ); |
910a0a6d |
100 | if( $ns ) { |
101 | $LISTWIT = "$ns:$LISTWIT"; |
102 | $WITNESS = "$ns:$WITNESS"; |
103 | $TEXT = "$ns:$TEXT"; |
104 | $W = "$ns:$W"; |
105 | $SEG = "$ns:$SEG"; |
106 | $APP = "$ns:$APP"; |
107 | $RDG = "$ns:$RDG"; |
108 | $LEM = "$ns:$LEM"; |
109 | } |
110 | } |
111 | |
112 | # Parse the TEI file. |
f6066bac |
113 | sub parse { |
dfc37e38 |
114 | my( $tradition, $opts ) = @_; |
f6066bac |
115 | |
116 | # First, parse the XML. |
117 | my $parser = XML::LibXML->new(); |
dfc37e38 |
118 | my $doc; |
119 | if( exists $opts->{'string'} ) { |
120 | $doc = $parser->parse_string( $opts->{'string'} ); |
121 | } elsif ( exists $opts->{'file'} ) { |
122 | $doc = $parser->parse_file( $opts->{'file'} ); |
dead25ca |
123 | } elsif ( exists $opts->{'xmlobj'} ) { |
124 | $doc = $opts->{'xmlobj'}; |
dfc37e38 |
125 | } else { |
126 | warn "Could not find string or file option to parse"; |
127 | return; |
128 | } |
f6066bac |
129 | my $tei = $doc->documentElement(); |
00311328 |
130 | unless( $tei->nodeName =~ /^tei(corpus)?$/i ) { |
131 | throw( "Parsed document has non-TEI root element " . $tei->nodeName ); |
132 | } |
f2b9605f |
133 | my $xpc = XML::LibXML::XPathContext->new( $tei ); |
910a0a6d |
134 | my $ns; |
135 | if( $tei->namespaceURI ) { |
136 | $ns = 'tei'; |
137 | $xpc->registerNs( $ns, $tei->namespaceURI ); |
138 | } |
3b853983 |
139 | _make_tagnames( $ns ); |
910a0a6d |
140 | |
f6066bac |
141 | # Then get the witnesses and create the witness objects. |
910a0a6d |
142 | foreach my $wit_el ( $xpc->findnodes( "//$LISTWIT/$WITNESS" ) ) { |
143 | my $sig = $wit_el->getAttribute( 'xml:id' ); |
144 | my $source = $wit_el->toString(); |
82fa4d57 |
145 | $tradition->add_witness( sigil => $sig, sourcetype => 'collation' ); |
f6066bac |
146 | } |
910a0a6d |
147 | map { $text->{$_->sigil} = [] } $tradition->witnesses; |
eca16057 |
148 | |
910a0a6d |
149 | # Look for all word/seg node IDs and note their pre-existence. |
a7fb3133 |
150 | my @attrs = $xpc->findnodes( "//$W/attribute::xml:id" ); |
3b853983 |
151 | _save_preexisting_nodeids( @attrs ); |
910a0a6d |
152 | |
eca16057 |
153 | # Count up how many apps we have. |
154 | my @apps = $xpc->findnodes( "//$APP" ); |
155 | $app_count = scalar( @apps ); |
156 | |
910a0a6d |
157 | # Now go through the children of the text element and pull out the |
158 | # actual text. |
159 | foreach my $xml_el ( $xpc->findnodes( "//$TEXT" ) ) { |
160 | foreach my $xn ( $xml_el->childNodes ) { |
161 | _get_readings( $tradition, $xn ); |
162 | } |
163 | } |
164 | # Our $text global now has lists of readings, one per witness. |
165 | # Join them up. |
166 | my $c = $tradition->collation; |
167 | foreach my $sig ( keys %$text ) { |
910a0a6d |
168 | # Determine the list of readings for |
169 | my $sequence = $text->{$sig}; |
170 | my @real_sequence = ( $c->start ); |
171 | push( @$sequence, $c->end ); |
342c1111 |
172 | foreach( _clean_sequence( $sig, $sequence, 1 ) ) { |
173 | push( @real_sequence, _return_rdg( $_ ) ); |
910a0a6d |
174 | } |
910a0a6d |
175 | # See if we need to make an a.c. version of the witness. |
176 | if( exists $app_ac->{$sig} ) { |
177 | my @uncorrected; |
178 | push( @uncorrected, @real_sequence ); |
342c1111 |
179 | # Get rid of any remaining placeholders. |
180 | @real_sequence = _clean_sequence( $sig, \@uncorrected ); |
181 | # Do the uncorrections |
910a0a6d |
182 | foreach my $app ( keys %{$app_ac->{$sig}} ) { |
183 | my $start = _return_rdg( $app_anchors->{$app}->{$sig}->{'start'} ); |
184 | my $end = _return_rdg( $app_anchors->{$app}->{$sig}->{'end'} ); |
185 | my @new = map { _return_rdg( $_ ) } @{$app_ac->{$sig}->{$app}}; |
186 | _replace_sequence( \@uncorrected, $start, $end, @new ); |
187 | } |
342c1111 |
188 | # and record the results. |
189 | $tradition->witness( $sig )->uncorrected_path( \@uncorrected ); |
1f7aa795 |
190 | $tradition->witness( $sig )->is_layered( 1 ); |
910a0a6d |
191 | } |
342c1111 |
192 | $tradition->witness( $sig )->path( \@real_sequence ); |
910a0a6d |
193 | } |
342c1111 |
194 | # Now make our witness paths. |
195 | $tradition->collation->make_witness_paths(); |
3b853983 |
196 | |
82a45078 |
197 | unless( $opts->{'nocalc'} ) { |
198 | # Calculate the ranks for the nodes. |
199 | $tradition->collation->calculate_ranks(); |
200 | |
201 | # Now that we have ranks, see if we have distinct nodes with identical |
202 | # text and identical rank that can be merged. |
203 | $tradition->collation->flatten_ranks(); |
204 | |
205 | # And now that we've done that, calculate the common nodes. |
206 | $tradition->collation->calculate_common_readings(); |
207 | |
208 | # Save the text for each witness so that we can ensure consistency |
209 | # later on |
210 | $tradition->collation->text_from_paths(); |
211 | } |
910a0a6d |
212 | } |
213 | |
214 | sub _clean_sequence { |
342c1111 |
215 | my( $wit, $sequence, $keep_ac ) = @_; |
910a0a6d |
216 | my @clean_sequence; |
217 | foreach my $rdg ( @$sequence ) { |
218 | if( $rdg =~ /^PH-(.*)$/ ) { |
342c1111 |
219 | # It is a placeholder. Keep it only if we need it for a later |
220 | # a.c. run. |
910a0a6d |
221 | my $app_id = $1; |
342c1111 |
222 | if( $keep_ac && exists $app_ac->{$wit} && |
78fab1cf |
223 | exists $app_ac->{$wit}->{$app_id} ) { |
342c1111 |
224 | # print STDERR "Retaining empty placeholder for $app_id\n"; |
225 | push( @clean_sequence, $rdg ); |
910a0a6d |
226 | } |
227 | } else { |
228 | push( @clean_sequence, $rdg ); |
229 | } |
f6066bac |
230 | } |
910a0a6d |
231 | return @clean_sequence; |
232 | } |
f6066bac |
233 | |
910a0a6d |
234 | sub _replace_sequence { |
235 | my( $arr, $start, $end, @new ) = @_; |
236 | my( $start_idx, $end_idx ); |
237 | foreach my $i ( 0 .. $#{$arr} ) { |
342c1111 |
238 | # If $arr->[$i] is a placeholder, cope. |
239 | my $iid = ref( $arr->[$i] ) ? $arr->[$i]->id : $arr->[$i]; |
240 | $start_idx = $i if( $iid eq $start ); |
241 | if( $iid eq $end ) { |
910a0a6d |
242 | $end_idx = $i; |
243 | last; |
244 | } |
245 | } |
246 | unless( $start_idx && $end_idx ) { |
247 | warn "Could not find start and end"; |
248 | return; |
f2b9605f |
249 | } |
910a0a6d |
250 | my $length = $end_idx - $start_idx + 1; |
251 | splice( @$arr, $start_idx, $length, @new ); |
252 | } |
f6066bac |
253 | |
910a0a6d |
254 | sub _return_rdg { |
255 | my( $rdg ) = @_; |
256 | # If we were passed a reading name, return the name. If we were |
257 | # passed a reading object, return the object. |
258 | my $wantobj = ref( $rdg ) eq 'Text::Tradition::Collation::Reading'; |
259 | my $real = $rdg; |
e4b0f464 |
260 | if( exists $substitutions->{ $wantobj ? $rdg->id : $rdg } ) { |
261 | $real = $substitutions->{ $wantobj ? $rdg->id : $rdg }; |
262 | $real = $real->id unless $wantobj; |
910a0a6d |
263 | } |
264 | return $real; |
f6066bac |
265 | } |
266 | |
3b853983 |
267 | ## TODO test specific sorts of nodes of the parallel-seg XML. |
268 | |
910a0a6d |
269 | ## Recursive helper function to help us navigate through nested XML, |
270 | ## picking out the text. $tradition is the tradition, needed for |
271 | ## making readings; $xn is the XML node currently being looked at, |
272 | ## $in_var is a flag to say that we are inside a variant, $ac is a |
273 | ## flag to say that we are inside an ante-correctionem reading, and |
274 | ## @cur_wits is the list of witnesses to which this XML node applies. |
275 | ## Returns the list of readings, if any, created on the run. |
276 | |
277 | { |
3bc0cd18 |
278 | my %active_wits; |
910a0a6d |
279 | my $current_app; |
eca16057 |
280 | my $seen_apps; |
910a0a6d |
281 | |
282 | sub _get_readings { |
283 | my( $tradition, $xn, $in_var, $ac, @cur_wits ) = @_; |
3bc0cd18 |
284 | @cur_wits = grep { $active_wits{$_} } keys %active_wits unless $in_var; |
910a0a6d |
285 | |
286 | my @new_readings; |
287 | if( $xn->nodeType == XML_TEXT_NODE ) { |
288 | # Some words, thus make some readings. |
289 | my $str = $xn->data; |
290 | return unless $str =~ /\S/; # skip whitespace-only text nodes |
291 | #print STDERR "Handling text node " . $str . "\n"; |
292 | # Check that all the witnesses we have are active. |
293 | foreach my $c ( @cur_wits ) { |
3bc0cd18 |
294 | warn "$c is not among active wits" unless $active_wits{$c}; |
910a0a6d |
295 | } |
296 | $str =~ s/^\s+//; |
297 | my $final = $str =~ s/\s+$//; |
298 | foreach my $w ( split( /\s+/, $str ) ) { |
299 | # For now, skip punctuation. |
300 | next if $w !~ /[[:alnum:]]/; |
3b853983 |
301 | my $rdg = _make_reading( $tradition->collation, $w ); |
910a0a6d |
302 | push( @new_readings, $rdg ); |
910a0a6d |
303 | foreach ( @cur_wits ) { |
304 | warn "Empty wit!" unless $_; |
305 | warn "Empty reading!" unless $rdg; |
306 | push( @{$text->{$_}}, $rdg ) unless $ac; |
307 | } |
308 | } |
309 | } elsif( $xn->nodeName eq 'w' ) { |
310 | # Everything in this tag is one word. Also save any original XML ID. |
311 | #print STDERR "Handling word " . $xn->toString . "\n"; |
312 | # Check that all the witnesses we have are active. |
313 | foreach my $c ( @cur_wits ) { |
3bc0cd18 |
314 | warn "$c is not among active wits" unless $active_wits{$c}; |
910a0a6d |
315 | } |
316 | my $xml_id = $xn->getAttribute( 'xml:id' ); |
3b853983 |
317 | my $rdg = _make_reading( $tradition->collation, $xn->textContent, $xml_id ); |
910a0a6d |
318 | push( @new_readings, $rdg ); |
910a0a6d |
319 | foreach( @cur_wits ) { |
320 | warn "Empty wit!" unless $_; |
321 | warn "Empty reading!" unless $rdg; |
322 | push( @{$text->{$_}}, $rdg ) unless $ac; |
323 | } |
324 | } elsif ( $xn->nodeName eq 'app' ) { |
eca16057 |
325 | $seen_apps++; |
910a0a6d |
326 | $current_app = $xn->getAttribute( 'xml:id' ); |
327 | # print STDERR "Handling app $current_app\n"; |
328 | # Keep the reading sets in this app. |
329 | my @sets; |
330 | # Recurse through all children (i.e. rdgs) for sets of words. |
331 | foreach ( $xn->childNodes ) { |
332 | my @rdg_set = _get_readings( $tradition, $_, $in_var, $ac, @cur_wits ); |
333 | push( @sets, \@rdg_set ) if @rdg_set; |
334 | } |
335 | # Now collate these sets if we have more than one. |
336 | my $subs = collate_variants( $tradition->collation, @sets ) if @sets > 1; |
337 | map { $substitutions->{$_} = $subs->{$_} } keys %$subs; |
910a0a6d |
338 | # Return the entire set of unique readings. |
339 | my %unique; |
340 | foreach my $s ( @sets ) { |
e4b0f464 |
341 | map { $unique{$_->id} = $_ } @$s; |
910a0a6d |
342 | } |
343 | push( @new_readings, values( %unique ) ); |
344 | # Exit the current app. |
345 | $current_app = ''; |
346 | } elsif ( $xn->nodeName eq 'lem' || $xn->nodeName eq 'rdg' ) { |
347 | # Alter the current witnesses and recurse. |
348 | #print STDERR "Handling reading for " . $xn->getAttribute( 'wit' ) . "\n"; |
3bc0cd18 |
349 | # TODO handle p.c. and s.l. designations too |
910a0a6d |
350 | $ac = $xn->getAttribute( 'type' ) && $xn->getAttribute( 'type' ) eq 'a.c.'; |
3b853983 |
351 | my @rdg_wits = _get_sigla( $xn ); |
a7fb3133 |
352 | return unless @rdg_wits; # Skip readings that appear in no witnesses |
910a0a6d |
353 | my @words; |
354 | foreach ( $xn->childNodes ) { |
355 | my @rdg_set = _get_readings( $tradition, $_, 1, $ac, @rdg_wits ); |
356 | push( @words, @rdg_set ) if @rdg_set; |
357 | } |
358 | # If we have more than one word in a reading, it should become a segment. |
359 | # $tradition->collation->add_segment( @words ) if @words > 1; |
360 | |
361 | if( $ac ) { |
362 | # Add the reading set to the a.c. readings. |
363 | foreach ( @rdg_wits ) { |
364 | $app_ac->{$_}->{$current_app} = \@words; |
365 | } |
366 | } else { |
367 | # Add the reading set to the app anchors for each witness |
368 | # or put in placeholders for empty p.c. readings |
369 | foreach ( @rdg_wits ) { |
e4b0f464 |
370 | my $start = @words ? $words[0]->id : "PH-$current_app"; |
371 | my $end = @words ? $words[-1]->id : "PH-$current_app"; |
910a0a6d |
372 | $app_anchors->{$current_app}->{$_}->{'start'} = $start; |
373 | $app_anchors->{$current_app}->{$_}->{'end'} = $end; |
374 | push( @{$text->{$_}}, $start ) unless @words; |
375 | } |
376 | } |
377 | push( @new_readings, @words ); |
378 | } elsif( $xn->nodeName eq 'witStart' ) { |
379 | # Add the relevant wit(s) to the active list. |
380 | #print STDERR "Handling witStart\n"; |
3bc0cd18 |
381 | map { $active_wits{$_} = 1 } @cur_wits; |
382 | # Record a lacuna in all non-active witnesses if this is |
383 | # the first app. Get the full list from $text. |
384 | if( $seen_apps == 1 ) { |
385 | my $i = 0; |
386 | foreach my $sig ( keys %$text ) { |
387 | next if $active_wits{$sig}; |
e4b0f464 |
388 | my $l = $tradition->collation->add_reading( { |
e4b0f464 |
389 | 'id' => $current_app . "_$i", |
390 | 'is_lacuna' => 1 } ); |
3bc0cd18 |
391 | $i++; |
392 | push( @{$text->{$sig}}, $l ); |
393 | } |
394 | } |
910a0a6d |
395 | } elsif( $xn->nodeName eq 'witEnd' ) { |
396 | # Take the relevant wit(s) out of the list. |
397 | #print STDERR "Handling witEnd\n"; |
3bc0cd18 |
398 | map { $active_wits{$_} = undef } @cur_wits; |
eca16057 |
399 | # Record a lacuna, unless this is the last app. |
400 | unless( $seen_apps == $app_count ) { |
401 | foreach my $i ( 0 .. $#cur_wits ) { |
402 | my $w = $cur_wits[$i]; |
e4b0f464 |
403 | my $l = $tradition->collation->add_reading( { |
e4b0f464 |
404 | 'id' => $current_app . "_$i", |
405 | 'is_lacuna' => 1 } ); |
eca16057 |
406 | push( @{$text->{$w}}, $l ); |
407 | } |
408 | } |
a7fb3133 |
409 | } elsif( $xn->nodeName eq 'witDetail' |
410 | || $xn->nodeName eq 'note' ) { |
910a0a6d |
411 | # Ignore these for now. |
412 | return; |
413 | } else { |
414 | # Recurse as if this tag weren't there. |
415 | #print STDERR "Recursing on tag " . $xn->nodeName . "\n"; |
416 | foreach( $xn->childNodes ) { |
417 | push( @new_readings, _get_readings( $tradition, $_, $in_var, $ac, @cur_wits ) ); |
418 | } |
419 | } |
420 | return @new_readings; |
421 | } |
422 | |
423 | } |
424 | |
3b853983 |
425 | =begin testing |
426 | |
427 | use XML::LibXML; |
428 | use XML::LibXML::XPathContext; |
429 | use Text::Tradition::Parser::TEI; |
430 | |
431 | my $xml_str = '<tei><rdg wit="#A #B #C #D">some text</rdg></tei>'; |
432 | my $el = XML::LibXML->new()->parse_string( $xml_str )->documentElement; |
433 | my $xpc = XML::LibXML::XPathContext->new( $el ); |
434 | my $obj = $xpc->find( '//rdg' ); |
435 | |
436 | my @wits = Text::Tradition::Parser::TEI::_get_sigla( $obj ); |
437 | is( join( ' ', @wits) , "A B C D", "correctly parsed reading wit string" ); |
438 | |
439 | =end testing |
440 | |
441 | =cut |
442 | |
910a0a6d |
443 | # Helper to extract a list of witness sigla from a reading element. |
3b853983 |
444 | sub _get_sigla { |
f6066bac |
445 | my( $rdg ) = @_; |
446 | # Cope if we have been handed a NodeList. There is only |
447 | # one reading here. |
448 | if( ref( $rdg ) eq 'XML::LibXML::NodeList' ) { |
910a0a6d |
449 | $rdg = $rdg->shift; |
f6066bac |
450 | } |
451 | |
452 | my @wits; |
453 | if( ref( $rdg ) eq 'XML::LibXML::Element' ) { |
910a0a6d |
454 | my $witstr = $rdg->getAttribute( 'wit' ); |
a7fb3133 |
455 | return () unless $witstr; |
910a0a6d |
456 | $witstr =~ s/^\s+//; |
457 | $witstr =~ s/\s+$//; |
458 | @wits = split( /\s+/, $witstr ); |
459 | map { $_ =~ s/^\#// } @wits; |
f6066bac |
460 | } |
461 | return @wits; |
462 | } |
463 | |
910a0a6d |
464 | # Helper with its counters to actually make the readings. |
f2b9605f |
465 | { |
466 | my $word_ctr = 0; |
467 | my %used_nodeids; |
468 | |
3b853983 |
469 | sub _save_preexisting_nodeids { |
910a0a6d |
470 | foreach( @_ ) { |
471 | $used_nodeids{$_->getValue()} = 1; |
472 | } |
473 | } |
474 | |
3b853983 |
475 | sub _make_reading { |
910a0a6d |
476 | my( $graph, $word, $xml_id ) = @_; |
477 | if( $xml_id ) { |
478 | if( exists $used_nodeids{$xml_id} ) { |
479 | if( $used_nodeids{$xml_id} != 1 ) { |
480 | warn "Already used assigned XML ID somewhere else!"; |
481 | $xml_id = undef; |
482 | } |
483 | } else { |
484 | warn "Undetected pre-existing XML ID"; |
485 | } |
486 | } |
487 | if( !$xml_id ) { |
488 | until( $xml_id ) { |
489 | my $try_id = 'w'.$word_ctr++; |
490 | next if exists $used_nodeids{$try_id}; |
491 | $xml_id = $try_id; |
492 | } |
493 | } |
e4b0f464 |
494 | my $rdg = $graph->add_reading( |
49d4f2ac |
495 | { 'id' => $xml_id, |
e4b0f464 |
496 | 'text' => $word } |
497 | ); |
910a0a6d |
498 | $used_nodeids{$xml_id} = $rdg; |
499 | return $rdg; |
f2b9605f |
500 | } |
501 | } |
502 | |
f6066bac |
503 | 1; |
3b853983 |
504 | |
00311328 |
505 | sub throw { |
506 | Text::Tradition::Error->throw( |
507 | 'ident' => 'Parser::TEI error', |
508 | 'message' => $_[0], |
509 | ); |
510 | } |
511 | |
3b853983 |
512 | =head1 BUGS / TODO |
513 | |
514 | =over |
515 | |
516 | =item * More unit testing |
517 | |
e867486f |
518 | =item * Handle special designations apart from a.c. |
519 | |
520 | =item * Mark common nodes within collated variants |
521 | |
3b853983 |
522 | =back |
523 | |
524 | =head1 LICENSE |
525 | |
526 | This package is free software and is provided "as is" without express |
527 | or implied warranty. You can redistribute it and/or modify it under |
528 | the same terms as Perl itself. |
529 | |
530 | =head1 AUTHOR |
531 | |
532 | Tara L Andrews E<lt>aurum@cpan.orgE<gt> |