exercises for part 1
[gitmo/moose-presentations.git] / moose-class / exercises / t / lib / Lingua / EN / Inflect.pm
CommitLineData
ddd87d75 1package Lingua::EN::Inflect;
2
3use strict;
4use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA);
5use Env;
6
7require Exporter;
8@ISA = qw(Exporter);
9
10$VERSION = '1.89';
11
12%EXPORT_TAGS =
13(
14 ALL => [ qw( classical inflect
15 PL PL_N PL_V PL_ADJ NO NUM A AN
16 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
17 PART_PRES
18 ORD
19 NUMWORDS
20 def_noun def_verb def_adj def_a def_an )],
21
22 INFLECTIONS => [ qw( classical inflect
23 PL PL_N PL_V PL_ADJ PL_eq
24 NO NUM A AN PART_PRES )],
25
26 PLURALS => [ qw( classical inflect
27 PL PL_N PL_V PL_ADJ NO NUM
28 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
29
30 COMPARISONS => [ qw( classical
31 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
32
33 ARTICLES => [ qw( classical inflect NUM A AN )],
34
35 NUMERICAL => [ qw( ORD NUMWORDS )],
36
37 USER_DEFINED => [ qw( def_noun def_verb def_adj def_a def_an )],
38);
39
40Exporter::export_ok_tags(qw( ALL ));
41
42# SUPPORT CLASSICAL PLURALIZATIONS
43
44my %def_classical = (
45 all => 0,
46 zero => 0,
47 herd => 0,
48 names => 1,
49 persons => 0,
50 ancient => 0,
51);
52
53my %all_classical = (
54 all => 1,
55 zero => 1,
56 herd => 1,
57 names => 1,
58 persons => 1,
59 ancient => 1,
60);
61
62my %classical = %def_classical;
63
64my $classical_mode = join '|', keys %all_classical;
65 $classical_mode = qr/^(?:$classical_mode)$/;
66
67sub classical
68{
69 if (!@_) {
70 %classical = %all_classical;
71 return;
72 }
73 if (@_==1 && $_[0] !~ $classical_mode) {
74 %classical = $_[0] ? %all_classical : ();
75 return;
76 }
77 while (@_) {
78 my $arg = shift;
79 if ($arg !~ $classical_mode) {
80 die "Unknown classical mode ($arg)\n";
81 }
82 if (@_ && $_[0] !~ $classical_mode) { $classical{$arg} = shift; }
83 else { $classical{$arg} = 1; }
84
85 if ($arg eq 'all') {
86 %classical = $classical{all} ? %all_classical : ();
87 }
88 }
89}
90
91my $persistent_count;
92
93sub NUM # (;$count,$show)
94{
95 if (defined $_[0])
96 {
97 $persistent_count = $_[0];
98 return $_[0] if !defined($_[1]) || $_[1];
99 }
100 else
101 {
102 $persistent_count = undef;
103 }
104 return '';
105}
106
107
108# 0. PERFORM GENERAL INFLECTIONS IN A STRING
109
110sub enclose { "(?:$_[0])" }
111
112sub inflect
113{
114 my $save_persistent_count = $persistent_count;
115 my @sections = split /(NUM\([^)]*\))/, $_[0];
116 my $inflection = "";
117
118 foreach ( @sections )
119 {
120 unless (s/NUM\(\s*?(?:([^),]*)(?:,([^)]*))?)?\)/ NUM($1,$2) /xe)
121 {
122 1 while
123 s/\bPL \( ([^),]*) (, ([^)]*) )? \) / PL($1,$3) /xeg
124 || s/\bPL_N \( ([^),]*) (, ([^)]*) )? \) / PL_N($1,$3) /xeg
125 || s/\bPL_V \( ([^),]*) (, ([^)]*) )? \) / PL_V($1,$3) /xeg
126 || s/\bPL_ADJ \( ([^),]*) (, ([^)]*) )? \) / PL_ADJ($1,$3) /xeg
127 || s/\bAN? \( ([^),]*) (, ([^)]*) )? \) / A($1,$3) /xeg
128 || s/\bNO \( ([^),]*) (, ([^)]*) )? \) / NO($1,$3) /xeg
129 || s/\bORD \( ([^)]*) \) / ORD($1) /xeg
130 || s/\bNUMWORDS \( ([^)]*) \) / NUMWORDS($1) /xeg
131 || s/\bPART_PRES \( ([^)]*) \) / PART_PRES($1) /xeg
132 }
133
134 $inflection .= $_;
135 }
136
137 $persistent_count = $save_persistent_count;
138 return $inflection;
139}
140
141
142# 1. PLURALS
143
144my %PL_sb_irregular_s =
145(
146 "corpus" => "corpuses|corpora",
147 "opus" => "opuses|opera",
148 "genus" => "genera",
149 "mythos" => "mythoi",
150 "penis" => "penises|penes",
151 "testis" => "testes",
152 "atlas" => "atlases|atlantes",
153);
154
155my %PL_sb_irregular =
156(
157 "child" => "children",
158 "brother" => "brothers|brethren",
159 "loaf" => "loaves",
160 "hoof" => "hoofs|hooves",
161 "beef" => "beefs|beeves",
162 "money" => "monies",
163 "mongoose" => "mongooses",
164 "ox" => "oxen",
165 "cow" => "cows|kine",
166 "soliloquy" => "soliloquies",
167 "graffito" => "graffiti",
168 "prima donna" => "prima donnas|prime donne",
169 "octopus" => "octopuses|octopodes",
170 "genie" => "genies|genii",
171 "ganglion" => "ganglions|ganglia",
172 "trilby" => "trilbys",
173 "turf" => "turfs|turves",
174 "numen" => "numina",
175 "atman" => "atmas",
176 "occiput" => "occiputs|occipita",
177
178 %PL_sb_irregular_s,
179);
180
181my $PL_sb_irregular = enclose join '|', keys %PL_sb_irregular;
182
183# CLASSICAL "..is" -> "..ides"
184
185my @PL_sb_C_is_ides =
186(
187# GENERAL WORDS...
188
189 "ephemeris", "iris", "clitoris",
190 "chrysalis", "epididymis",
191
192# INFLAMATIONS...
193
194 ".*itis",
195
196);
197
198my $PL_sb_C_is_ides = enclose join "|", map { substr($_,0,-2) } @PL_sb_C_is_ides;
199
200# CLASSICAL "..a" -> "..ata"
201
202my @PL_sb_C_a_ata =
203(
204 "anathema", "bema", "carcinoma", "charisma", "diploma",
205 "dogma", "drama", "edema", "enema", "enigma", "lemma",
206 "lymphoma", "magma", "melisma", "miasma", "oedema",
207 "sarcoma", "schema", "soma", "stigma", "stoma", "trauma",
208 "gumma", "pragma",
209);
210
211my $PL_sb_C_a_ata = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_a_ata;
212
213# UNCONDITIONAL "..a" -> "..ae"
214
215my $PL_sb_U_a_ae = enclose join "|",
216(
217 "alumna", "alga", "vertebra", "persona"
218);
219
220# CLASSICAL "..a" -> "..ae"
221
222my $PL_sb_C_a_ae = enclose join "|",
223(
224 "amoeba", "antenna", "formula", "hyperbola",
225 "medusa", "nebula", "parabola", "abscissa",
226 "hydra", "nova", "lacuna", "aurora", ".*umbra",
227 "flora", "fauna",
228);
229
230# CLASSICAL "..en" -> "..ina"
231
232my $PL_sb_C_en_ina = enclose join "|", map { substr($_,0,-2) }
233(
234 "stamen", "foramen", "lumen"
235);
236
237# UNCONDITIONAL "..um" -> "..a"
238
239my $PL_sb_U_um_a = enclose join "|", map { substr($_,0,-2) }
240(
241 "bacterium", "agendum", "desideratum", "erratum",
242 "stratum", "datum", "ovum", "extremum",
243 "candelabrum",
244);
245
246# CLASSICAL "..um" -> "..a"
247
248my $PL_sb_C_um_a = enclose join "|", map { substr($_,0,-2) }
249(
250 "maximum", "minimum", "momentum", "optimum",
251 "quantum", "cranium", "curriculum", "dictum",
252 "phylum", "aquarium", "compendium", "emporium",
253 "enconium", "gymnasium", "honorarium", "interregnum",
254 "lustrum", "memorandum", "millennium", "rostrum",
255 "spectrum", "speculum", "stadium", "trapezium",
256 "ultimatum", "medium", "vacuum", "velum",
257 "consortium",
258);
259
260# UNCONDITIONAL "..us" -> "i"
261
262my $PL_sb_U_us_i = enclose join "|", map { substr($_,0,-2) }
263(
264 "alumnus", "alveolus", "bacillus", "bronchus",
265 "locus", "nucleus", "stimulus", "meniscus",
266);
267
268# CLASSICAL "..us" -> "..i"
269
270my $PL_sb_C_us_i = enclose join "|", map { substr($_,0,-2) }
271(
272 "focus", "radius", "genius",
273 "incubus", "succubus", "nimbus",
274 "fungus", "nucleolus", "stylus",
275 "torus", "umbilicus", "uterus",
276 "hippopotamus",
277);
278
279# CLASSICAL "..us" -> "..us" (ASSIMILATED 4TH DECLENSION LATIN NOUNS)
280
281my $PL_sb_C_us_us = enclose join "|",
282(
283 "status", "apparatus", "prospectus", "sinus",
284 "hiatus", "impetus", "plexus",
285);
286
287# UNCONDITIONAL "..on" -> "a"
288
289my $PL_sb_U_on_a = enclose join "|", map { substr($_,0,-2) }
290(
291 "criterion", "perihelion", "aphelion",
292 "phenomenon", "prolegomenon", "noumenon",
293 "organon", "asyndeton", "hyperbaton",
294);
295
296# CLASSICAL "..on" -> "..a"
297
298my $PL_sb_C_on_a = enclose join "|", map { substr($_,0,-2) }
299(
300 "oxymoron",
301);
302
303# CLASSICAL "..o" -> "..i" (BUT NORMALLY -> "..os")
304
305my @PL_sb_C_o_i =
306(
307 "solo", "soprano", "basso", "alto",
308 "contralto", "tempo", "piano", "virtuoso",
309);
310my $PL_sb_C_o_i = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_o_i;
311
312# ALWAYS "..o" -> "..os"
313
314my $PL_sb_U_o_os = enclose join "|",
315(
316 "albino", "archipelago", "armadillo",
317 "commando", "crescendo", "fiasco",
318 "ditto", "dynamo", "embryo",
319 "ghetto", "guano", "inferno",
320 "jumbo", "lumbago", "magneto",
321 "manifesto", "medico", "octavo",
322 "photo", "pro", "quarto",
323 "canto", "lingo", "generalissimo",
324 "stylo", "rhino", "casino",
325 "auto", "macro", 'zero',
326
327 @PL_sb_C_o_i,
328);
329
330
331# UNCONDITIONAL "..[ei]x" -> "..ices"
332
333my $PL_sb_U_ex_ices = enclose join "|", map { substr($_,0,-2) }
334(
335 "codex", "murex", "silex",
336);
337
338my $PL_sb_U_ix_ices = enclose join "|", map { substr($_,0,-2) }
339(
340 "radix", "helix",
341);
342
343# CLASSICAL "..[ei]x" -> "..ices"
344
345my $PL_sb_C_ex_ices = enclose join "|", map { substr($_,0,-2) }
346(
347 "vortex", "vertex", "cortex", "latex",
348 "pontifex", "apex", "index", "simplex",
349);
350
351my $PL_sb_C_ix_ices = enclose join "|", map { substr($_,0,-2) }
352(
353 "appendix",
354);
355
356# ARABIC: ".." -> "..i"
357
358my $PL_sb_C_i = enclose join "|",
359(
360 "afrit", "afreet", "efreet",
361);
362
363# HEBREW: ".." -> "..im"
364
365my $PL_sb_C_im = enclose join "|",
366(
367 "goy", "seraph", "cherub",
368);
369
370# UNCONDITIONAL "..man" -> "..mans"
371
372my $PL_sb_U_man_mans = enclose join "|",
373qw(
374 human
375 Alabaman Bahaman Burman German
376 Hiroshiman Liman Nakayaman Oklahoman
377 Panaman Selman Sonaman Tacoman Yakiman
378 Yokohaman Yuman
379);
380
381my @PL_sb_uninflected_s =
382(
383# PAIRS OR GROUPS SUBSUMED TO A SINGULAR...
384 "breeches", "britches", "clippers", "gallows", "hijinks",
385 "headquarters", "pliers", "scissors", "testes", "herpes",
386 "pincers", "shears", "proceedings", "trousers",
387
388# UNASSIMILATED LATIN 4th DECLENSION
389
390 "cantus", "coitus", "nexus",
391
392# RECENT IMPORTS...
393 "contretemps", "corps", "debris",
394 ".*ois", "siemens",
395
396# DISEASES
397 ".*measles", "mumps",
398
399# MISCELLANEOUS OTHERS...
400 "diabetes", "jackanapes", "series", "species", "rabies",
401 "chassis", "innings", "news", "mews",
402);
403
404my $PL_sb_uninflected_herd = enclose join "|",
405# DON'T INFLECT IN CLASSICAL MODE, OTHERWISE NORMAL INFLECTION
406(
407 "wildebeest", "swine", "eland", "bison", "buffalo",
408 "elk", "moose", "rhinoceros",
409);
410
411my $PL_sb_uninflected = enclose join "|",
412(
413# SOME FISH AND HERD ANIMALS
414 ".*fish", "tuna", "salmon", "mackerel", "trout",
415 "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
416
417 ".*deer", ".*sheep",
418
419# ALL NATIONALS ENDING IN -ese
420 "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
421 "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
422 "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
423 "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
424 "Shavese", "Vermontese", "Wenchowese", "Yengeese",
425 ".*[nrlm]ese",
426
427# SOME WORDS ENDING IN ...s (OFTEN PAIRS TAKEN AS A WHOLE)
428
429 @PL_sb_uninflected_s,
430
431# DISEASES
432 ".*pox",
433
434
435# OTHER ODDITIES
436 "graffiti", "djinn"
437);
438
439# SINGULAR WORDS ENDING IN ...s (ALL INFLECT WITH ...es)
440
441my $PL_sb_singular_s = enclose join '|',
442(
443 ".*ss",
444 "acropolis", "aegis", "alias", "asbestos", "bathos", "bias",
445 "bronchitis", "bursitis", "caddis", "cannabis",
446 "canvas", "chaos", "cosmos", "dais", "digitalis",
447 "epidermis", "ethos", "eyas", "gas", "glottis",
448 "hubris", "ibis", "lens", "mantis", "marquis", "metropolis",
449 "pathos", "pelvis", "polis", "rhinoceros",
450 "sassafras", "trellis", ".*us", "[A-Z].*es",
451
452 @PL_sb_C_is_ides,
453);
454
455my $PL_v_special_s = enclose join '|',
456(
457 $PL_sb_singular_s,
458 @PL_sb_uninflected_s,
459 keys %PL_sb_irregular_s,
460 '(.*[csx])is',
461 '(.*)ceps',
462 '[A-Z].*s',
463);
464
465my %PL_sb_postfix_adj = (
466 'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
467 'martial' => [qw(court)],
468);
469
470foreach (keys %PL_sb_postfix_adj) {
471 $PL_sb_postfix_adj{$_} = enclose
472 enclose(join('|', @{$PL_sb_postfix_adj{$_}}))
473 . "(?=(?:-|\\s+)$_)";
474}
475
476my $PL_sb_postfix_adj = '(' . join('|', values %PL_sb_postfix_adj) . ')(.*)';
477
478my $PL_sb_military = 'major|lieutenant|brigadier|adjutant|quartermaster';
479my $PL_sb_general = '((?!'.$PL_sb_military.').*?)((-|\s+)general)';
480
481my $PL_prep = enclose join '|', qw (
482 about above across after among around at athwart before behind
483 below beneath beside besides between betwixt beyond but by
484 during except for from in into near of off on onto out over
485 since till to under until unto upon with
486);
487
488my $PL_sb_prep_dual_compound = '(.*?)((?:-|\s+)(?:'.$PL_prep.'|d[eu])(?:-|\s+))a(?:-|\s+)(.*)';
489
490my $PL_sb_prep_compound = '(.*?)((-|\s+)('.$PL_prep.'|d[eu])((-|\s+)(.*))?)';
491
492
493my %PL_pron_nom =
494(
495# NOMINATIVE REFLEXIVE
496
497"i" => "we", "myself" => "ourselves",
498"you" => "you", "yourself" => "yourselves",
499"she" => "they", "herself" => "themselves",
500"he" => "they", "himself" => "themselves",
501"it" => "they", "itself" => "themselves",
502"they" => "they", "themself" => "themselves",
503
504# POSSESSIVE
505
506"mine" => "ours",
507"yours" => "yours",
508"hers" => "theirs",
509"his" => "theirs",
510"its" => "theirs",
511"theirs" => "theirs",
512);
513
514my %PL_pron_acc =
515(
516# ACCUSATIVE REFLEXIVE
517
518"me" => "us", "myself" => "ourselves",
519"you" => "you", "yourself" => "yourselves",
520"her" => "them", "herself" => "themselves",
521"him" => "them", "himself" => "themselves",
522"it" => "them", "itself" => "themselves",
523"them" => "them", "themself" => "themselves",
524);
525
526my $PL_pron_acc = enclose join '|', keys %PL_pron_acc;
527
528my %PL_v_irregular_pres =
529(
530# 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR
531# 3RD PERS. (INDET.)
532
533"am" => "are", "are" => "are", "is" => "are",
534"was" => "were", "were" => "were", "was" => "were",
535"have" => "have", "have" => "have", "has" => "have",
536"do" => "do", "do" => "do", "does" => "do",
537);
538
539my $PL_v_irregular_pres = enclose join '|', keys %PL_v_irregular_pres;
540
541my %PL_v_ambiguous_pres =
542(
543# 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR
544# 3RD PERS. (INDET.)
545
546"act" => "act", "act" => "act", "acts" => "act",
547"blame" => "blame", "blame" => "blame", "blames" => "blame",
548"can" => "can", "can" => "can", "can" => "can",
549"must" => "must", "must" => "must", "must" => "must",
550"fly" => "fly", "fly" => "fly", "flies" => "fly",
551"copy" => "copy", "copy" => "copy", "copies" => "copy",
552"drink" => "drink", "drink" => "drink", "drinks" => "drink",
553"fight" => "fight", "fight" => "fight", "fights" => "fight",
554"fire" => "fire", "fire" => "fire", "fires" => "fire",
555"like" => "like", "like" => "like", "likes" => "like",
556"look" => "look", "look" => "look", "looks" => "look",
557"make" => "make", "make" => "make", "makes" => "make",
558"reach" => "reach", "reach" => "reach", "reaches" => "reach",
559"run" => "run", "run" => "run", "runs" => "run",
560"sink" => "sink", "sink" => "sink", "sinks" => "sink",
561"sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
562"view" => "view", "view" => "view", "views" => "view",
563);
564
565my $PL_v_ambiguous_pres = enclose join '|', keys %PL_v_ambiguous_pres;
566
567
568my $PL_v_irregular_non_pres = enclose join '|',
569(
570"did", "had", "ate", "made", "put",
571"spent", "fought", "sank", "gave", "sought",
572"shall", "could", "ought", "should",
573);
574
575my $PL_v_ambiguous_non_pres = enclose join '|',
576(
577"thought", "saw", "bent", "will", "might", "cut",
578);
579
580# "..oes" -> "..oe" (the rest are "..oes" -> "o")
581
582my $PL_v_oes_oe = enclose join "|",
583qw(
584 .*shoes .*hoes .*toes
585 canoes floes oboes roes throes woes
586);
587
588my $PL_count_zero = enclose join '|',
589(
5900, "no", "zero", "nil"
591);
592
593my $PL_count_one = enclose join '|',
594(
5951, "a", "an", "one", "each", "every", "this", "that",
596);
597
598my %PL_adj_special =
599(
600"a" => "some", "an" => "some",
601"this" => "these", "that" => "those",
602);
603my $PL_adj_special = enclose join '|', keys %PL_adj_special;
604
605my %PL_adj_poss =
606(
607"my" => "our",
608"your" => "your",
609"its" => "their",
610"her" => "their",
611"his" => "their",
612"their" => "their",
613);
614my $PL_adj_poss = enclose join '|', keys %PL_adj_poss;
615
616
617sub checkpat
618{
619local $SIG{__WARN__} = sub {0};
620do {$@ =~ s/at.*?$//;
621 die "\nBad user-defined singular pattern:\n\t$@\n"}
622 if (!eval "'' =~ m/$_[0]/; 1;" or $@);
623return @_;
624}
625
626sub checkpatsubs
627{
628checkpat($_[0]);
629if (defined $_[1])
630{
631 local $SIG{__WARN__} = sub {0};
632 do {$@ =~ s/at.*?$//;
633 die "\nBad user-defined plural string: '$_[1]'\n\t$@\n"}
634 if (!eval "qq{$_[1]}; 1;" or $@);
635}
636return @_;
637}
638
639my @PL_sb_user_defined = ();
640my @PL_v_user_defined = ();
641my @PL_adj_user_defined = ();
642my @A_a_user_defined = ();
643
644sub def_noun
645{
646 unshift @PL_sb_user_defined, checkpatsubs(@_);
647 return 1;
648}
649
650sub def_verb
651{
652 unshift @PL_v_user_defined, checkpatsubs(@_[4,5]);
653 unshift @PL_v_user_defined, checkpatsubs(@_[2,3]);
654 unshift @PL_v_user_defined, checkpatsubs(@_[0,1]);
655 return 1;
656}
657
658sub def_adj
659{
660 unshift @PL_adj_user_defined, checkpatsubs(@_);
661 return 1;
662}
663
664sub def_a
665{
666unshift @A_a_user_defined, checkpat(@_,'a');
667return 1;
668}
669
670sub def_an
671{
672unshift @A_a_user_defined, checkpat(@_,'an');
673return 1;
674}
675
676sub ud_match
677{
678my $word = shift;
679for (my $i=0; $i < @_; $i+=2)
680{
681 if ($word =~ /^(?:$_[$i])$/i)
682 {
683 last unless defined $_[$i+1];
684 return eval '"'.$_[$i+1].'"';
685 }
686}
687return undef;
688}
689
690do
691{
692local $SIG{__WARN__} = sub {0};
693my $rcfile;
694
695$rcfile = $INC{'Lingua//EN/Inflect.pm'} || '';
696$rcfile =~ s/Inflect.pm$/.inflectrc/;
697do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
698if $rcfile && -r $rcfile && -s $rcfile;
699
700$rcfile = "$ENV{HOME}/.inflectrc" || '';
701do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
702if $rcfile && -r $rcfile && -s $rcfile;
703};
704
705sub postprocess # FIX PEDANTRY AND CAPITALIZATION :-)
706{
707my ($orig, $inflected) = @_;
708$inflected =~ s/([^|]+)\|(.+)/ $classical{all}?$2:$1 /e;
709return $orig =~ /^I$/ ? $inflected
710 : $orig =~ /^[A-Z]+$/ ? uc $inflected
711 : $orig =~ /^[A-Z]/ ? ucfirst $inflected
712 : $inflected;
713}
714
715sub PL
716# PL($word,$number)
717{
718my ($str, $count) = @_;
719my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
720return $str unless $word;
721my $plural = postprocess $word, _PL_special_adjective($word,$count)
722 || _PL_special_verb($word,$count)
723 || _PL_noun($word,$count);
724return $pre.$plural.$post;
725}
726
727sub PL_N
728# PL_N($word,$number)
729{
730my ($str, $count) = @_;
731my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
732return $str unless $word;
733my $plural = postprocess $word, _PL_noun($word,$count);
734return $pre.$plural.$post;
735}
736
737sub PL_V
738# PL_V($word,$number)
739{
740my ($str, $count) = @_;
741my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
742return $str unless $word;
743my $plural = postprocess $word, _PL_special_verb($word,$count)
744 || _PL_general_verb($word,$count);
745return $pre.$plural.$post;
746}
747
748sub PL_ADJ
749# PL_ADJ($word,$number)
750{
751my ($str, $count) = @_;
752my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
753return $str unless $word;
754my $plural = postprocess $word, _PL_special_adjective($word,$count)
755 || $word;
756return $pre.$plural.$post;
757}
758
759sub PL_eq { _PL_eq(@_, \&PL); }
760sub PL_N_eq { _PL_eq(@_, \&PL_N); }
761sub PL_V_eq { _PL_eq(@_, \&PL_V); }
762sub PL_ADJ_eq { _PL_eq(@_, \&PL_ADJ); }
763
764sub _PL_eq
765{
766my ( $word1, $word2, $PL ) = @_;
767my %classval = %classical;
768%classical = %all_classical;
769my $result = "";
770$result = "eq" if !$result && $word1 eq $word2;
771$result = "p:s" if !$result && $word1 eq &$PL($word2);
772$result = "s:p" if !$result && &$PL($word1) eq $word2;
773%classical = ();
774$result = "p:s" if !$result && $word1 eq &$PL($word2);
775$result = "s:p" if !$result && &$PL($word1) eq $word2;
776%classical = %classval;
777
778if ($PL == \&PL || $PL == \&PL_N)
779{
780 $result = "p:p"
781 if !$result && _PL_check_plurals_N($word1,$word2);
782 $result = "p:p"
783 if !$result && _PL_check_plurals_N($word2,$word1);
784}
785if ($PL == \&PL || $PL == \&PL_ADJ)
786{
787 $result = "p:p"
788 if !$result && _PL_check_plurals_ADJ($word1,$word2,$PL);
789}
790
791return $result;
792}
793
794sub _PL_reg_plurals
795{
796 $_[0] =~ /($_[1])($_[2]\|\1$_[3]|$_[3]\|\1$_[2])/
797}
798
799sub _PL_check_plurals_N
800{
801my $pair = "$_[0]|$_[1]";
802foreach ( values %PL_sb_irregular_s ) { return 1 if $_ eq $pair; }
803foreach ( values %PL_sb_irregular ) { return 1 if $_ eq $pair; }
804
805return 1 if _PL_reg_plurals($pair, $PL_sb_C_a_ata, "as","ata")
806 || _PL_reg_plurals($pair, $PL_sb_C_is_ides, "is","ides")
807 || _PL_reg_plurals($pair, $PL_sb_C_a_ae, "s","e")
808 || _PL_reg_plurals($pair, $PL_sb_C_en_ina, "ens","ina")
809 || _PL_reg_plurals($pair, $PL_sb_C_um_a, "ums","a")
810 || _PL_reg_plurals($pair, $PL_sb_C_us_i, "uses","i")
811 || _PL_reg_plurals($pair, $PL_sb_C_on_a, "ons","a")
812 || _PL_reg_plurals($pair, $PL_sb_C_o_i, "os","i")
813 || _PL_reg_plurals($pair, $PL_sb_C_ex_ices, "exes","ices")
814 || _PL_reg_plurals($pair, $PL_sb_C_ix_ices, "ixes","ices")
815 || _PL_reg_plurals($pair, $PL_sb_C_i, "s","i")
816 || _PL_reg_plurals($pair, $PL_sb_C_im, "s","im")
817
818 || _PL_reg_plurals($pair, '.*eau', "s","x")
819 || _PL_reg_plurals($pair, '.*ieu', "s","x")
820 || _PL_reg_plurals($pair, '.*tri', "xes","ces")
821 || _PL_reg_plurals($pair, '.{2,}[yia]n', "xes","ges");
822
823
824return 0;
825}
826
827sub _PL_check_plurals_ADJ
828{
829my ( $word1a, $word2a ) = @_;
830my ( $word1b, $word2b ) = @_;
831
832$word1a = '' unless $word1a =~ s/'s?$//;
833$word2a = '' unless $word2a =~ s/'s?$//;
834$word1b = '' unless $word1b =~ s/s'$//;
835$word2b = '' unless $word2b =~ s/s'$//;
836
837if ($word1a)
838{
839 return 1 if $word2a && ( _PL_check_plurals_N($word1a, $word2a)
840 || _PL_check_plurals_N($word2a, $word1a) );
841 return 1 if $word2b && ( _PL_check_plurals_N($word1a, $word2b)
842 || _PL_check_plurals_N($word2b, $word1a) );
843}
844if ($word1b)
845{
846 return 1 if $word2a && ( _PL_check_plurals_N($word1b, $word2a)
847 || _PL_check_plurals_N($word2a, $word1b) );
848 return 1 if $word2b && ( _PL_check_plurals_N($word1b, $word2b)
849 || _PL_check_plurals_N($word2b, $word1b) );
850}
851
852
853return "";
854}
855
856sub _PL_noun
857{
858my ( $word, $count ) = @_;
859my $value; # UTILITY VARIABLE
860
861# DEFAULT TO PLURAL
862
863$count = $persistent_count
864 if !defined($count) && defined($persistent_count);
865
866$count = (defined $count and $count=~/^($PL_count_one)$/io
867 or defined $count and $classical{zero}
868 and $count=~/^($PL_count_zero)$/io)
869 ? 1
870 : 2;
871
872return $word if $count==1;
873
874# HANDLE USER-DEFINED NOUNS
875
876return $value if defined($value = ud_match($word, @PL_sb_user_defined));
877
878
879# HANDLE EMPTY WORD, SINGULAR COUNT AND UNINFLECTED PLURALS
880
881$word eq '' and return $word;
882
883$word =~ /^($PL_sb_uninflected)$/i
884 and return $word;
885
886$classical{herd} and $word =~ /^($PL_sb_uninflected_herd)$/i
887 and return $word;
888
889
890# HANDLE COMPOUNDS ("Governor General", "mother-in-law", "aide-de-camp", ETC.)
891
892$word =~ /^(?:$PL_sb_postfix_adj)$/i
893 and $value = $2
894 and return _PL_noun($1,2)
895 . $value;
896
897$word =~ /^(?:$PL_sb_prep_dual_compound)$/i
898 and $value = [$2,$3]
899 and return _PL_noun($1,2)
900 . $value->[0]
901 . _PL_noun($value->[1]);
902
903$word =~ /^(?:$PL_sb_prep_compound)$/i
904 and $value = $2
905 and return _PL_noun($1,2)
906 . $value;
907
908# HANDLE PRONOUNS
909
910$word =~ /^((?:$PL_prep)\s+)($PL_pron_acc)$/i
911 and return $1.$PL_pron_acc{lc($2)};
912
913$value = $PL_pron_nom{lc($word)}
914 and return $value;
915
916$word =~ /^($PL_pron_acc)$/i
917 and return $PL_pron_acc{lc($1)};
918
919# HANDLE ISOLATED IRREGULAR PLURALS
920
921$word =~ /(.*)\b($PL_sb_irregular)$/i
922 and return $1 . $PL_sb_irregular{lc $2};
923$word =~ /($PL_sb_U_man_mans)$/i
924 and return "$1s";
925$word =~ /(\S*)(person)$/i and return $classical{persons}?"$1persons":"$1people";
926
927# HANDLE FAMILIES OF IRREGULAR PLURALS
928
929$word =~ /(.*)man$/i and return "$1men";
930$word =~ /(.*[ml])ouse$/i and return "$1ice";
931$word =~ /(.*)goose$/i and return "$1geese";
932$word =~ /(.*)tooth$/i and return "$1teeth";
933$word =~ /(.*)foot$/i and return "$1feet";
934
935# HANDLE UNASSIMILATED IMPORTS
936
937$word =~ /(.*)ceps$/i and return $word;
938$word =~ /(.*)zoon$/i and return "$1zoa";
939$word =~ /(.*[csx])is$/i and return "$1es";
940$word =~ /($PL_sb_U_ex_ices)ex$/i and return "$1ices";
941$word =~ /($PL_sb_U_ix_ices)ix$/i and return "$1ices";
942$word =~ /($PL_sb_U_um_a)um$/i and return "$1a";
943$word =~ /($PL_sb_U_us_i)us$/i and return "$1i";
944$word =~ /($PL_sb_U_on_a)on$/i and return "$1a";
945$word =~ /($PL_sb_U_a_ae)$/i and return "$1e";
946
947# HANDLE INCOMPLETELY ASSIMILATED IMPORTS
948
949if ($classical{ancient})
950{
951 $word =~ /(.*)trix$/i and return "$1trices";
952 $word =~ /(.*)eau$/i and return "$1eaux";
953 $word =~ /(.*)ieu$/i and return "$1ieux";
954 $word =~ /(.{2,}[yia])nx$/i and return "$1nges";
955 $word =~ /($PL_sb_C_en_ina)en$/i and return "$1ina";
956 $word =~ /($PL_sb_C_ex_ices)ex$/i and return "$1ices";
957 $word =~ /($PL_sb_C_ix_ices)ix$/i and return "$1ices";
958 $word =~ /($PL_sb_C_um_a)um$/i and return "$1a";
959 $word =~ /($PL_sb_C_us_i)us$/i and return "$1i";
960 $word =~ /($PL_sb_C_us_us)$/i and return "$1";
961 $word =~ /($PL_sb_C_a_ae)$/i and return "$1e";
962 $word =~ /($PL_sb_C_a_ata)a$/i and return "$1ata";
963 $word =~ /($PL_sb_C_is_ides)is$/i and return "$1ides";
964 $word =~ /($PL_sb_C_o_i)o$/i and return "$1i";
965 $word =~ /($PL_sb_C_on_a)on$/i and return "$1a";
966 $word =~ /$PL_sb_C_im$/i and return "${word}im";
967 $word =~ /$PL_sb_C_i$/i and return "${word}i";
968}
969
970
971# HANDLE SINGULAR NOUNS ENDING IN ...s OR OTHER SILIBANTS
972
973$word =~ /^($PL_sb_singular_s)$/i and return "$1es";
974$word =~ /^([A-Z].*s)$/ and $classical{names} and return "$1es";
975$word =~ /(.*)([cs]h|[zx])$/i and return "$1$2es";
976# $word =~ /(.*)(us)$/i and return "$1$2es";
977
978# HANDLE ...f -> ...ves
979
980$word =~ /(.*[eao])lf$/i and return "$1lves";
981$word =~ /(.*[^d])eaf$/i and return "$1eaves";
982$word =~ /(.*[nlw])ife$/i and return "$1ives";
983$word =~ /(.*)arf$/i and return "$1arves";
984
985# HANDLE ...y
986
987$word =~ /(.*[aeiou])y$/i and return "$1ys";
988$word =~ /([A-Z].*y)$/ and $classical{names} and return "$1s";
989$word =~ /(.*)y$/i and return "$1ies";
990
991# HANDLE ...o
992
993$word =~ /$PL_sb_U_o_os$/i and return "${word}s";
994$word =~ /[aeiou]o$/i and return "${word}s";
995$word =~ /o$/i and return "${word}es";
996
997
998# OTHERWISE JUST ADD ...s
999
1000return "${word}s";
1001}
1002
1003
1004sub _PL_special_verb
1005{
1006my ( $word, $count ) = @_;
1007$count = $persistent_count
1008 if !defined($count) && defined($persistent_count);
1009$count = (defined $count and $count=~/^($PL_count_one)$/io or
1010 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1011 : 2;
1012
1013return undef if $count=~/^($PL_count_one)$/io;
1014
1015my $value; # UTILITY VARIABLE
1016
1017# HANDLE USER-DEFINED VERBS
1018
1019return $value if defined($value = ud_match($word, @PL_v_user_defined));
1020
1021# HANDLE IRREGULAR PRESENT TENSE (SIMPLE AND COMPOUND)
1022
1023$word =~ /^($PL_v_irregular_pres)((\s.*)?)$/i
1024 and return $PL_v_irregular_pres{lc $1}.$2;
1025
1026# HANDLE IRREGULAR FUTURE, PRETERITE AND PERFECT TENSES
1027
1028$word =~ /^($PL_v_irregular_non_pres)((\s.*)?)$/i
1029 and return $word;
1030
1031# HANDLE PRESENT NEGATIONS (SIMPLE AND COMPOUND)
1032
1033$word =~ /^($PL_v_irregular_pres)(n't(\s.*)?)$/i
1034 and return $PL_v_irregular_pres{lc $1}.$2;
1035
1036$word =~ /^\S+n't\b/i
1037 and return $word;
1038
1039# HANDLE SPECIAL CASES
1040
1041$word =~ /^($PL_v_special_s)$/ and return undef;
1042$word =~ /\s/ and return undef;
1043
1044# HANDLE STANDARD 3RD PERSON (CHOP THE ...(e)s OFF SINGLE WORDS)
1045
1046$word =~ /^(.*)([cs]h|[x]|zz|ss)es$/i and return "$1$2";
1047
1048$word =~ /^(..+)ies$/i and return "$1y";
1049
1050$word =~ /($PL_v_oes_oe)$/ and return substr($1,0,-1);
1051$word =~ /^(.+)oes$/i and return "$1o";
1052
1053$word =~ /^(.*[^s])s$/i and return $1;
1054
1055# OTHERWISE, A REGULAR VERB (HANDLE ELSEWHERE)
1056
1057return undef;
1058}
1059
1060sub _PL_general_verb
1061{
1062my ( $word, $count ) = @_;
1063$count = $persistent_count
1064 if !defined($count) && defined($persistent_count);
1065$count = (defined $count and $count=~/^($PL_count_one)$/io or
1066 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1067 : 2;
1068
1069return $word if $count=~/^($PL_count_one)$/io;
1070
1071# HANDLE AMBIGUOUS PRESENT TENSES (SIMPLE AND COMPOUND)
1072
1073$word =~ /^($PL_v_ambiguous_pres)((\s.*)?)$/i
1074 and return $PL_v_ambiguous_pres{lc $1}.$2;
1075
1076# HANDLE AMBIGUOUS PRETERITE AND PERFECT TENSES
1077
1078$word =~ /^($PL_v_ambiguous_non_pres)((\s.*)?)$/i
1079 and return $word;
1080
1081# OTHERWISE, 1st OR 2ND PERSON IS UNINFLECTED
1082
1083return $word;
1084
1085}
1086
1087sub _PL_special_adjective
1088{
1089my ( $word, $count ) = @_;
1090$count = $persistent_count
1091 if !defined($count) && defined($persistent_count);
1092$count = (defined $count and $count=~/^($PL_count_one)$/io or
1093 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1094 : 2;
1095
1096return $word if $count=~/^($PL_count_one)$/io;
1097
1098
1099# HANDLE USER-DEFINED ADJECTIVES
1100
1101my $value;
1102return $value if defined($value = ud_match($word, @PL_adj_user_defined));
1103
1104# HANDLE KNOWN CASES
1105
1106$word =~ /^($PL_adj_special)$/i
1107 and return $PL_adj_special{lc $1};
1108
1109# HANDLE POSSESSIVES
1110
1111$word =~ /^($PL_adj_poss)$/i
1112 and return $PL_adj_poss{lc $1};
1113
1114$word =~ /^(.*)'s?$/ and do { my $pl = PL_N($1);
1115 return "$pl'" . ($pl =~ m/s$/ ? "" : "s");
1116 };
1117
1118# OTHERWISE, NO IDEA
1119
1120return undef;
1121
1122}
1123
1124
1125# 2. INDEFINITE ARTICLES
1126
1127# THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND"
1128# CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY
1129# TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!)
1130
1131my $A_abbrev = q{
1132(?! FJO | [HLMNS]Y. | RY[EO] | SQU
1133 | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
1134[FHLMNRSX][A-Z]
1135};
1136
1137# THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A
1138# 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE
1139# IMPLIES AN ABBREVIATION.
1140
1141my $A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)';
1142
1143# EXCEPTIONS TO EXCEPTIONS
1144
1145my $A_explicit_an = enclose join '|',
1146(
1147"euler",
1148"hour(?!i)", "heir", "honest", "hono",
1149);
1150
1151sub A
1152{
1153my ($str, $count) = @_;
1154my ($pre, $word, $post) = ( $str =~ m/\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z/i );
1155return $str unless $word;
1156my $result = _indef_article($word,$count);
1157return $pre.$result.$post;
1158}
1159
1160sub AN { goto &A }
1161
1162sub _indef_article
1163{
1164my ( $word, $count ) = @_;
1165
1166$count = $persistent_count
1167 if !defined($count) && defined($persistent_count);
1168
1169return "$count $word"
1170 if defined $count && $count!~/^($PL_count_one)$/io;
1171
1172# HANDLE USER-DEFINED VARIANTS
1173
1174my $value;
1175return $value if defined($value = ud_match($word, @A_a_user_defined));
1176
1177# HANDLE SPECIAL CASES
1178
1179$word =~ /^($A_explicit_an)/i and return "an $word";
1180
1181# HANDLE ABBREVIATIONS
1182
1183$word =~ /^($A_abbrev)/ox and return "an $word";
1184$word =~ /^[aefhilmnorsx][.-]/i and return "an $word";
1185$word =~ /^[a-z][.-]/i and return "a $word";
1186
1187# HANDLE CONSONANTS
1188
1189$word =~ /^[^aeiouy]/i and return "a $word";
1190
1191# HANDLE SPECIAL VOWEL-FORMS
1192
1193$word =~ /^e[uw]/i and return "a $word";
1194$word =~ /^onc?e\b/i and return "a $word";
1195$word =~ /^uni([^nmd]|mo)/i and return "a $word";
1196$word =~ /^u[bcfhjkqrst][aeiou]/i and return "a $word";
1197
1198# HANDLE SPECIAL CAPITALS
1199
1200$word =~ /^U[NK][AIEO]?/ and return "a $word";
1201
1202# HANDLE VOWELS
1203
1204$word =~ /^[aeiou]/i and return "an $word";
1205
1206# HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND)
1207
1208$word =~ /^($A_y_cons)/io and return "an $word";
1209
1210# OTHERWISE, GUESS "a"
1211 return "a $word";
1212}
1213
1214# 2. TRANSLATE ZERO-QUANTIFIED $word TO "no PL($word)"
1215
1216sub NO
1217{
1218my ($str, $count) = @_;
1219my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
1220
1221$count = $persistent_count
1222 if !defined($count) && defined($persistent_count);
1223$count = 0 unless $count;
1224
1225return "$pre$count " . PL($word,$count) . $post
1226 unless $count =~ /^$PL_count_zero$/;
1227return "${pre}no ". PL($word,0) . $post ;
1228}
1229
1230
1231# PARTICIPLES
1232
1233sub PART_PRES
1234{
1235 local $_ = PL_V(shift,2);
1236 s/ie$/y/
1237 or s/ue$/u/
1238 or s/([auy])e$/$1/
1239 or s/ski$/ski/
1240 or s/i$//
1241 or s/([^e])e$/$1/
1242 or m/er$/
1243 or s/([^aeiou][aeiouy]([bdgmnprst]))$/$1$2/;
1244 return "${_}ing";
1245}
1246
1247
1248
1249# NUMERICAL INFLECTIONS
1250
1251my %nth =
1252(
1253 0 => 'th',
1254 1 => 'st',
1255 2 => 'nd',
1256 3 => 'rd',
1257 4 => 'th',
1258 5 => 'th',
1259 6 => 'th',
1260 7 => 'th',
1261 8 => 'th',
1262 9 => 'th',
1263 11 => 'th',
1264 12 => 'th',
1265 13 => 'th',
1266);
1267
1268
1269my %ordinal;
1270@ordinal{qw(ty one two three five eight nine twelve )}=
1271 qw(tieth first second third fifth eighth ninth twelfth);
1272
1273my $ordinal_suff = join '|', keys %ordinal, "";
1274
1275$ordinal{""} = 'th';
1276
1277sub ORD($)
1278{
1279 my $num = shift;
1280 if ($num =~ /\d/) {
1281 return $num . ($nth{$num%100} || $nth{$num%10});
1282 }
1283 else {
1284 $num =~ s/($ordinal_suff)\Z/$ordinal{$1}/;
1285 return $num;
1286 }
1287}
1288
1289
1290my %default_args =
1291(
1292 'group' => 0,
1293 'comma' => ',',
1294 'and' => 'and',
1295 'zero' => 'zero',
1296 'one' => 'one',
1297 'decimal' => 'point',
1298);
1299
1300my @unit = ('',qw(one two three four five six seven eight nine));
1301my @teen = qw(ten eleven twelve thirteen fourteen
1302 fifteen sixteen seventeen eighteen nineteen);
1303my @ten = ('','',qw(twenty thirty forty fifty sixty seventy eighty ninety));
1304my @mill = map { (my $val=$_) =~ s/_/illion/; " $val" }
1305 ('',qw(thousand m_ b_ tr_ quadr_ quint_ sext_ sept_ oct_ non_ dec_));
1306
1307
1308sub mill { my $ind = $_[0]||0;
1309 die "Number out of range\n" if $ind > $#mill;
1310 return $ind<@mill ? $mill[$ind] : ' ???illion'; }
1311
1312sub unit { return $unit[$_[0]]. mill($_[1]); }
1313
1314sub ten
1315{
1316 return $ten[$_[0]] . ($_[0]&&$_[1]?'-':'') . $unit[$_[1]] . mill($_[2])
1317 if $_[0] ne '1';
1318 return $teen[$_[1]]. $mill[$_[2]||0];
1319}
1320
1321sub hund
1322{
1323 return unit($_[0]) . " hundred" . ($_[1] || $_[2] ? " $_[4] " : '')
1324 . ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[0];
1325 return ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[1] || $_[2];
1326 return '';
1327}
1328
1329
1330sub enword
1331{
1332 my ($num,$group,$zero,$one,$comma,$and) = @_;
1333
1334 if ($group==1)
1335 {
1336 $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /eg;
1337 }
1338 elsif ($group==2)
1339 {
1340 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /eg;
1341 $num =~ s/(\d)/ ($1 ? unit($1) :" $zero")."$comma " /e;
1342 }
1343 elsif ($group==3)
1344 {
1345 $num =~ s/(\d)(\d)(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")." ".($2 ? ten($2,$3) : $3 ? " $zero " . unit($3) : " $zero $zero") . "$comma " /eg;
1346 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /e;
1347 $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /e;
1348 }
1349 elsif ($num+0==0) {
1350 $num = $zero;
1351 }
1352 elsif ($num+0==1) {
1353 $num = $one;
1354 }
1355 else {
1356 $num =~ s/\A\s*0+//;
1357 my $mill = 0;
1358 1 while $num =~ s/(\d)(\d)(\d)(?=\D*\Z)/ hund($1,$2,$3,$mill++,$and) /e;
1359 $num =~ s/(\d)(\d)(?=\D*\Z)/ ten($1,$2,$mill)."$comma " /e;
1360 $num =~ s/(\d)(?=\D*\Z)/ unit($1,$mill) . "$comma "/e;
1361 }
1362 return $num;
1363}
1364
1365sub NUMWORDS
1366{
1367 my $num = shift;
1368 my %arg = ( %default_args, @_ );
1369 my $group = $arg{group};
1370
1371 die "Bad chunking option: $group\n" unless $group =~ /\A[0-3]\Z/;
1372 my $sign = ($num =~ /\A\s*\+/) ? "plus"
1373 : ($num =~ /\A\s*\-/) ? "minus"
1374 : '';
1375
1376 my ($zero, $one) = @arg{'zero','one'};
1377 my $comma = $arg{comma};
1378 my $and = $arg{'and'};
1379
1380 my $ord = $num =~ s/(st|nd|rd|th)\Z//;
1381 my @chunks = ($arg{decimal})
1382 ? $group ? split(/\./, $num) : split(/\./, $num, 2)
1383 : ($num);
1384
1385 my $first = 1;
1386
1387 if ($chunks[0] eq '') { $first=0; shift @chunks; }
1388
1389 foreach ( @chunks )
1390 {
1391 s/\D//g;
1392 $_ = '0' unless $_;
1393
1394 if (!$group && !$first) { $_ = enword($_,1,$zero,$one,$comma,$and) }
1395 else { $_ = enword($_,$group,$zero,$one,$comma,$and) }
1396
1397 s/, \Z//;
1398 s/\s+,/,/g;
1399 s/, (\S+)\s+\Z/ $and $1/ if !$group and $first;
1400 s/\s+/ /g;
1401 s/(\A\s|\s\Z)//g;
1402 $first = '' if $first;
1403 }
1404
1405 my @numchunks = ();
1406 if ($first =~ /0/)
1407 {
1408 unshift @chunks, '';
1409 }
1410 else
1411 {
1412 @numchunks = split /\Q$comma /, $chunks[0];
1413 }
1414
1415 $numchunks[-1] =~ s/($ordinal_suff)\Z/$ordinal{$1}/
1416 if $ord and @numchunks;
1417
1418 foreach (@chunks[1..$#chunks])
1419 {
1420 push @numchunks, $arg{decimal};
1421 push @numchunks, split /\Q$comma /;
1422 }
1423
1424 if (wantarray)
1425 {
1426 unshift @numchunks, $sign if $sign;
1427 return @numchunks
1428 }
1429 elsif ($group)
1430 {
1431 return ($sign?"$sign ":'') . join ", ", @numchunks;
1432 }
1433 else
1434 {
1435 $num = ($sign?"$sign ":'') . shift @numchunks;
1436 $first = ($num !~ /$arg{decimal}\Z/);
1437 foreach ( @numchunks )
1438 {
1439 if (/\A$arg{decimal}\Z/)
1440 {
1441 $num .= " $_";
1442 $first = 0;
1443 }
1444 elsif ($first)
1445 {
1446 $num .= "$comma $_";
1447 }
1448 else
1449 {
1450 $num .= " $_";
1451 }
1452 }
1453 return $num;
1454 }
1455}
1456
14571;
1458
1459__END__
1460
1461=head1 NAME
1462
1463Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
1464
1465=head1 VERSION
1466
1467This document describes version 1.86 of Lingua::EN::Inflect,
1468released October 20, 2000.
1469
1470=head1 SYNOPSIS
1471
1472 use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
1473 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
1474 A AN
1475 PART_PRES
1476 ORD NUMWORDS
1477 inflect classical
1478 def_noun def_verb def_adj def_a def_an );
1479
1480
1481 # UNCONDITIONALLY FORM THE PLURAL
1482
1483 print "The plural of ", $word, " is ", PL($word), "\n";
1484
1485
1486 # CONDITIONALLY FORM THE PLURAL
1487
1488 print "I saw $cat_count ", PL("cat",$cat_count), "\n";
1489
1490
1491 # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
1492
1493 print PL_N("I",$N1), PL_V("saw",$N1),
1494 PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
1495
1496
1497 # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
1498
1499 print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
1500
1501
1502 # USE DEFAULT COUNTS:
1503
1504 print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
1505 print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
1506
1507
1508 # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
1509
1510 print "same\n" if PL_eq($word1, $word2);
1511 print "same noun\n" if PL_eq_N($word1, $word2);
1512 print "same verb\n" if PL_eq_V($word1, $word2);
1513 print "same adj.\n" if PL_eq_ADJ($word1, $word2);
1514
1515
1516 # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
1517
1518 print "Did you want ", A($thing), " or ", AN($idea), "\n";
1519
1520
1521 # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
1522
1523 print "It was", ORD($position), " from the left\n";
1524
1525 # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
1526 # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
1527
1528 $words = NUMWORDS(1234); # "one thousand, two hundred and thirty-four"
1529 $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
1530
1531
1532 # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
1533
1534 @words = NUMWORDS(1234); # ("one thousand","two hundred and thirty-four")
1535
1536
1537 # OPTIONAL PARAMETERS CHANGE TRANSLATION:
1538
1539 $words = NUMWORDS(12345, group=>1);
1540 # "one, two, three, four, five"
1541
1542 $words = NUMWORDS(12345, group=>2);
1543 # "twelve, thirty-four, five"
1544
1545 $words = NUMWORDS(12345, group=>3);
1546 # "one twenty-three, forty-five"
1547
1548 $words = NUMWORDS(1234, 'and'=>'');
1549 # "one thousand, two hundred thirty-four"
1550
1551 $words = NUMWORDS(1234, 'and'=>', plus');
1552 # "one thousand, two hundred, plus thirty-four"
1553
1554 $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
1555 # "five, five, five, one, two, oh, two"
1556
1557 $words = NUMWORDS(555_1202, group=>1, one=>'unity');
1558 # "five, five, five, unity, two, oh, two"
1559
1560 $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
1561 # "one two three mark four five six"
1562
1563
1564 # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
1565
1566 classical; # USE ALL CLASSICAL PLURALS
1567
1568 classical 1; # USE ALL CLASSICAL PLURALS
1569 classical 0; # USE ALL MODERN PLURALS (DEFAULT)
1570
1571 classical 'zero'; # "no error" INSTEAD OF "no errors"
1572 classical zero=>1; # "no error" INSTEAD OF "no errors"
1573 classical zero=>0; # "no errors" INSTEAD OF "no error"
1574
1575 classical 'herd'; # "2 buffalo" INSTEAD OF "2 buffalos"
1576 classical herd=>1; # "2 buffalo" INSTEAD OF "2 buffalos"
1577 classical herd=>0; # "2 buffalos" INSTEAD OF "2 buffalo"
1578
1579 classical 'persons'; # "2 chairpersons" INSTEAD OF "2 chairpeople"
1580 classical persons=>1; # "2 chairpersons" INSTEAD OF "2 chairpeople"
1581 classical persons=>0; # "2 chairpeople" INSTEAD OF "2 chairpersons"
1582
1583 classical 'ancient'; # "2 formulae" INSTEAD OF "2 formulas"
1584 classical ancient=>1; # "2 formulae" INSTEAD OF "2 formulas"
1585 classical ancient=>0; # "2 formulas" INSTEAD OF "2 formulae"
1586
1587
1588
1589 # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
1590 # "NUM()" AND "ORD()" WITHIN STRINGS:
1591
1592 print inflect("The plural of $word is PL($word)\n");
1593 print inflect("I saw $cat_count PL("cat",$cat_count)\n");
1594 print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)");
1595 print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)");
1596 print inflect("I saw NUM($cat_count) PL("cat")\nNUM()");
1597 print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
1598 print inflect("There NUM($errors,) PL_V(was) NO(error)\n";
1599 print inflect("Did you want A($thing) or AN($idea)\n");
1600 print inflect("It was ORD($position) from the left\n");
1601
1602
1603 # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
1604
1605 def_noun "VAX" => "VAXen"; # SINGULAR => PLURAL
1606
1607 def_verb "will" => "shall", # 1ST PERSON SINGULAR => PLURAL
1608 "will" => "will", # 2ND PERSON SINGULAR => PLURAL
1609 "will" => "will", # 3RD PERSON SINGULAR => PLURAL
1610
1611 def_adj "hir" => "their", # SINGULAR => PLURAL
1612
1613 def_a "h" # "AY HALWAYS SEZ 'HAITCH'!"
1614
1615 def_an "horrendous.*" # "AN HORRENDOUS AFFECTATION"
1616
1617
1618=head1 DESCRIPTION
1619
1620The exportable subroutines of Lingua::EN::Inflect provide plural
1621inflections, "a"/"an" selection for English words, and manipulation
1622of numbers as words
1623
1624Plural forms of all nouns, most verbs, and some adjectives are
1625provided. Where appropriate, "classical" variants (for example: "brother" ->
1626"brethren", "dogma" -> "dogmata", etc.) are also provided.
1627
1628Pronunciation-based "a"/"an" selection is provided for all English
1629words, and most initialisms.
1630
1631It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd, 3rd)
1632and to english words ("one", "two", "three).
1633
1634In generating these inflections, Lingua::EN::Inflect follows the Oxford
1635English Dictionary and the guidelines in Fowler's Modern English
1636Usage, preferring the former where the two disagree.
1637
1638The module is built around standard British spelling, but is designed
1639to cope with common American variants as well. Slang, jargon, and
1640other English dialects are I<not> explicitly catered for.
1641
1642Where two or more inflected forms exist for a single word (typically a
1643"classical" form and a "modern" form), Lingua::EN::Inflect prefers the
1644more common form (typically the "modern" one), unless "classical"
1645processing has been specified
1646(see L<"MODERN VS CLASSICAL INFLECTIONS">).
1647
1648=head1 FORMING PLURALS
1649
1650=head2 Inflecting Plurals
1651
1652All of the C<PL_...> plural inflection subroutines take the word to be
1653inflected as their first argument and return the corresponding inflection.
1654Note that all such subroutines expect the I<singular> form of the word. The
1655results of passing a plural form are undefined (and unlikely to be correct).
1656
1657The C<PL_...> subroutines also take an optional second argument,
1658which indicates the grammatical "number" of the word (or of another word
1659with which the word being inflected must agree). If the "number" argument is
1660supplied and is not C<1> (or C<"one"> or C<"a">, or some other adjective that
1661implies the singular), the plural form of the word is returned. If the
1662"number" argument I<does> indicate singularity, the (uninflected) word
1663itself is returned. If the number argument is omitted, the plural form
1664is returned unconditionally.
1665
1666The various subroutines are:
1667
1668=over 8
1669
1670=item C<PL_N($;$)>
1671
1672The exportable subroutine C<PL_N()> takes a I<singular> English noun or
1673pronoun and returns its plural. Pronouns in the nominative ("I" ->
1674"we") and accusative ("me" -> "us") cases are handled, as are
1675possessive pronouns ("mine" -> "ours").
1676
1677
1678=item C<PL_V($;$)>
1679
1680The exportable subroutine C<PL_V()> takes the I<singular> form of a
1681conjugated verb (that is, one which is already in the correct "person"
1682and "mood") and returns the corresponding plural conjugation.
1683
1684
1685=item C<PL_ADJ($;$)>
1686
1687The exportable subroutine C<PL_ADJ()> takes the I<singular> form of
1688certain types of adjectives and returns the corresponding plural form.
1689Adjectives that are correctly handled include: "numerical" adjectives
1690("a" -> "some"), demonstrative adjectives ("this" -> "these", "that" ->
1691"those"), and possessives ("my" -> "our", "cat's" -> "cats'", "child's"
1692-> "childrens'", etc.)
1693
1694
1695=item C<PL($;$)>
1696
1697The exportable subroutine C<PL()> takes a I<singular> English noun,
1698pronoun, verb, or adjective and returns its plural form. Where a word
1699has more than one inflection depending on its part of speech (for
1700example, the noun "thought" inflects to "thoughts", the verb "thought"
1701to "thought"), the (singular) noun sense is preferred to the (singular)
1702verb sense.
1703
1704Hence C<PL("knife")> will return "knives" ("knife" having been treated
1705as a singular noun), whereas C<PL("knifes")> will return "knife"
1706("knifes" having been treated as a 3rd person singular verb).
1707
1708The inherent ambiguity of such cases suggests that,
1709where the part of speech is known, C<PL_N>, C<PL_V>, and
1710C<PL_ADJ> should be used in preference to C<PL>.
1711
1712=back
1713
1714Note that all these subroutines ignore any whitespace surrounding the
1715word being inflected, but preserve that whitespace when the result is
1716returned. For example, C<S<PL(" cat ")>> returns S<" cats ">.
1717
1718
1719=head2 Numbered plurals
1720
1721The C<PL_...> subroutines return only the inflected word, not the count that
1722was used to inflect it. Thus, in order to produce "I saw 3 ducks", it
1723is necessary to use:
1724
1725 print "I saw $N ", PL_N($animal,$N), "\n";
1726
1727Since the usual purpose of producing a plural is to make it agree with
1728a preceding count, Lingua::EN::Inflect provides an exportable subroutine
1729(C<NO($;$)>) which, given a word and a(n optional) count, returns the
1730count followed by the correctly inflected word. Hence the previous
1731example can be rewritten:
1732
1733 print "I saw ", NO($animal,$N), "\n";
1734
1735In addition, if the count is zero (or some other term which implies
1736zero, such as C<"zero">, C<"nil">, etc.) the count is replaced by the
1737word "no". Hence, if C<$N> had the value zero, the previous example
1738would print the somewhat more elegant:
1739
1740 I saw no animals
1741
1742rather than:
1743
1744 I saw 0 animals
1745
1746Note that the name of the subroutine is a pun: the subroutine
1747returns either a number (a I<No.>) or a C<"no">, in front of the
1748inflected word.
1749
1750
1751=head2 Reducing the number of counts required
1752
1753In some contexts, the need to supply an explicit count to the various
1754C<PL_...> subroutines makes for tiresome repetition. For example:
1755
1756 print PL_ADJ("This",$errors), PL_N(" error",$errors),
1757 PL_V(" was",$errors), " fatal.\n";
1758
1759Lingua::EN::Inflect therefore provides an exportable subroutine
1760(C<NUM($;$)>) which may be used to set a persistent "default number"
1761value. If such a value is set, it is subsequently used whenever an
1762optional second "number" argument is omitted. The default value thus set
1763can subsequently be removed by calling C<NUM()> with no arguments.
1764Hence we could rewrite the previous example:
1765
1766 NUM($errors);
1767 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
1768 NUM();
1769
1770Normally, C<NUM()> returns its first argument, so that it may also
1771be "inlined" in contexts like:
1772
1773 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
1774 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
1775 if $severity > 1;
1776
1777However, in certain contexts (see L<"INTERPOLATING INFLECTIONS IN STRINGS">)
1778it is preferable that C<NUM()> return an empty string. Hence C<NUM()>
1779provides an optional second argument. If that argument is supplied (that is, if
1780it is defined) and evaluates to false, C<NUM> returns an empty string
1781instead of its first argument. For example:
1782
1783 print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
1784 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
1785 if $severity > 1;
1786
1787
1788
1789=head2 Number-insensitive equality
1790
1791Lingua::EN::Inflect also provides a solution to the problem
1792of comparing words of differing plurality through the exportable subroutines
1793C<PL_eq($$)>, C<PL_N_eq($$)>, C<PL_V_eq($$)>, and C<PL_ADJ_eq($$)>.
1794Each of these subroutines takes two strings, and compares them
1795using the corresponding plural-inflection subroutine (C<PL()>, C<PL_N()>,
1796C<PL_V()>, and C<PL_ADJ()> respectively).
1797
1798The comparison returns true if:
1799
1800=over 8
1801
1802=item *
1803
1804the strings are C<eq>-equal, or
1805
1806=item *
1807
1808one string is C<eq>-equal to a plural form of the other, or
1809
1810=item *
1811
1812the strings are two different plural forms of the one word.
1813
1814=back
1815
1816Hence all of the following return true:
1817
1818 PL_eq("index","index") # RETURNS "eq"
1819 PL_eq("index","indexes") # RETURNS "s:p"
1820 PL_eq("index","indices") # RETURNS "s:p"
1821 PL_eq("indexes","index") # RETURNS "p:s"
1822 PL_eq("indices","index") # RETURNS "p:s"
1823 PL_eq("indices","indexes") # RETURNS "p:p"
1824 PL_eq("indexes","indices") # RETURNS "p:p"
1825 PL_eq("indices","indices") # RETURNS "eq"
1826
1827As indicated by the comments in the previous example, the actual value
1828returned by the various C<PL_eq_...> subroutines encodes which of the
1829three equality rules succeeded: "eq" is returned if the strings were
1830identical, "s:p" if the strings were singular and plural respectively,
1831"p:s" for plural and singular, and "p:p" for two distinct plurals.
1832Inequality is indicated by returning an empty string.
1833
1834It should be noted that two distinct singular words which happen to take
1835the same plural form are I<not> considered equal, nor are cases where
1836one (singular) word's plural is the other (plural) word's singular.
1837Hence all of the following return false:
1838
1839 PL_eq("base","basis") # ALTHOUGH BOTH -> "bases"
1840 PL_eq("syrinx","syringe") # ALTHOUGH BOTH -> "syringes"
1841 PL_eq("she","he") # ALTHOUGH BOTH -> "they"
1842
1843 PL_eq("opus","operas") # ALTHOUGH "opus" -> "opera" -> "operas"
1844 PL_eq("taxi","taxes") # ALTHOUGH "taxi" -> "taxis" -> "taxes"
1845
1846Note too that, although the comparison is "number-insensitive" it is I<not>
1847case-insensitive (that is, C<PL("time","Times")> returns false. To obtain
1848both number and case insensitivity, prefix both arguments with C<lc>
1849(that is, C<PL(lc "time", lc "Times")> returns true).
1850
1851
1852=head1 OTHER VERB FORMS
1853
1854=head2 Present participles
1855
1856C<Lingua::EN::Inflect> also provides the C<PART_PRES> subroutine,
1857which can take a 3rd person singular verb and
1858correctly inflect it to its present participle:
1859
1860 PART_PRES("runs") # "running"
1861 PART_PRES("loves") # "loving"
1862 PART_PRES("eats") # "eating"
1863 PART_PRES("bats") # "batting"
1864 PART_PRES("spies") # "spying"
1865
1866
1867=head1 PROVIDING INDEFINITE ARTICLES
1868
1869=head2 Selecting indefinite articles
1870
1871Lingua::EN::Inflect provides two exportable subroutines (C<A($;$)> and
1872C<AN($;$)>) which will correctly prepend the appropriate indefinite
1873article to a word, depending on its pronunciation. For example:
1874
1875 A("cat") # -> "a cat"
1876 AN("cat") # -> "a cat"
1877 A("euphemism") # -> "a euphemism"
1878 A("Euler number") # -> "an Euler number"
1879 A("hour") # -> "an hour"
1880 A("houri") # -> "a houri"
1881
1882The two subroutines are I<identical> in function and may be used
1883interchangeably. The only reason that two versions are provided is to
1884enhance the readability of code such as:
1885
1886 print "That is ", AN($errortype), " error\n;
1887 print "That is ", A($fataltype), " fatal error\n;
1888
1889Note that in both cases the actual article provided depends I<only> on
1890the pronunciation of the first argument, I<not> on the name of the
1891subroutine.
1892
1893C<A()> and C<AN()> will ignore any indefinite article that already
1894exists at the start of the string. Thus:
1895
1896 @half_arked = (
1897 "a elephant",
1898 "a giraffe",
1899 "an ewe",
1900 "a orangutan",
1901 );
1902
1903 print A($_), "\n" for @half_arked;
1904
1905 # prints:
1906 # an elephant
1907 # a giraffe
1908 # a ewe
1909 # an orangutan
1910
1911
1912C<A()> and C<AN()> both take an optional second argument. As with the
1913C<PL_...> subroutines, this second argument is a "number" specifier. If
1914its value is C<1> (or some other value implying singularity), C<A()> and
1915C<AN()> insert "a" or "an" as appropriate. If the number specifier
1916implies plurality, (C<A()> and C<AN()> insert the actual second argument instead.
1917For example:
1918
1919 A("cat",1) # -> "a cat"
1920 A("cat",2) # -> "2 cat"
1921 A("cat","one") # -> "one cat"
1922 A("cat","no") # -> "no cat"
1923
1924Note that, as implied by the previous examples, C<A()> and
1925C<AN()> both assume that their job is merely to provide the correct
1926qualifier for a word (that is: "a", "an", or the specified count).
1927In other words, they assume that the word they are given has
1928already been correctly inflected for plurality. Hence, if C<$N>
1929has the value 2, then:
1930
1931 print A("cat",$N);
1932
1933prints "2 cat", instead of "2 cats". The correct approach is to use:
1934
1935 print A(PL("cat",$N),$N);
1936
1937or, better still:
1938
1939 print NO("cat",$N);
1940
1941Note too that, like the various C<PL_...> subroutines, whenever C<A()>
1942and C<AN()> are called with only one argument they are subject to the
1943effects of any preceding call to C<NUM()>. Hence, another possible
1944solution is:
1945
1946 NUM($N);
1947 print A(PL("cat"));
1948
1949
1950=head2 Indefinite articles and initialisms
1951
1952"Initialisms" (sometimes inaccurately called "acronyms") are terms which
1953have been formed from the initial letters of words in a phrase (for
1954example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
1955
1956Such terms present a particular challenge when selecting between "a"
1957and "an", since they are sometimes pronounced as if they were a single
1958word ("nay-tow", "sku-ba") and sometimes as a series of letter names
1959("en-eff-ell", "ess-oh-ess").
1960
1961C<A()> and C<AN()> cope with this dichotomy using a series of inbuilt
1962rules, which may be summarized as:
1963
1964=over 8
1965
1966=item 1.
1967
1968If the word starts with a single letter, followed by a period or dash
1969(for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray", "T-square"), then
1970choose the appropriate article for the I<sound> of the first letter
1971("an R.I.P.", "a C.O.D.", "an e-mail", "an X-ray", "a T-square").
1972
1973=item 2.
1974
1975If the first two letters of the word are capitals,
1976consonants, and do not appear at the start of any known English word,
1977(for example, "LCD", "XML", "YWCA"), then once again choose "a" or
1978"an" depending on the I<sound> of the first letter ("an LCD", "an
1979XML", "a YWCA").
1980
1981=item 3.
1982
1983Otherwise, assume the string is a capitalized word or a
1984pronounceable initialism (for example, "LED", "OPEC", "FAQ", "UNESCO"), and
1985therefore takes "a" or "an" according to the (apparent) pronunciation of
1986the entire word ("a LED", "an OPEC", "a FAQ", "a UNESCO").
1987
1988=back
1989
1990Note that rules 1 and 3 together imply that the presence or absence of
1991punctuation may change the selection of indefinite article for a
1992particular initialism (for example, "a FAQ" but "an F.A.Q.").
1993
1994
1995=head2 Indefinite articles and "soft H's"
1996
1997Words beginning in the letter 'H' present another type of difficulty
1998when selecting a suitable indefinite article. In a few such words
1999(for example, "hour", "honour", "heir") the 'H' is not voiced at
2000all, and so such words inflect with "an". The remaining cases
2001("voiced H's") may be divided into two categories:
2002"hard H's" (such as "hangman", "holograph", "hat", etc.) and
2003"soft H's" (such as "hysterical", "horrendous", "holy", etc.)
2004
2005Hard H's always take "a" as their indefinite article, and soft
2006H's normally do so as well. But I<some> English speakers prefer
2007"an" for soft H's (although the practice is now generally considered an
2008affectation, rather than a legitimate grammatical alternative).
2009
2010At present, the C<A()> and C<AN()> subroutines ignore soft H's and use
2011"a" for any voiced 'H'. The author would, however, welcome feedback on
2012this decision (envisaging a possible future "soft H" mode).
2013
2014
2015=head1 INFLECTING ORDINALS
2016
2017Occasionally it is useful to present an integer value as an ordinal
2018rather than as a numeral. For example:
2019
2020 Enter password (1st attempt): ********
2021 Enter password (2nd attempt): *********
2022 Enter password (3rd attempt): *********
2023 No 4th attempt. Access denied.
2024
2025To this end, Lingua::EN::Inflect provides the C<ORD()> subroutine.
2026<ORD()> takes a single argument and forms its ordinal equivalent.
2027If the argument isn't a numerical integer, it just adds "-th".
2028
2029
2030=head1 CONVERTING NUMBERS TO WORDS
2031
2032The exportable subroutine C<NUMWORDS> takes a number (cardinal or ordinal)
2033and returns an English representation of that number. In a scalar context
2034a string is returned. Hence:
2035
2036 use Lingua::EN::Inflect qw( NUMWORDS );
2037
2038 $words = NUMWORDS(1234567);
2039
2040puts the string:
2041
2042 "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
2043
2044into $words.
2045
2046In a list context each comma-separated chunk is returned as a separate element.
2047Hence:
2048
2049 @words = NUMWORDS(1234567);
2050
2051puts the list:
2052
2053 ("one million",
2054 "two hundred and thirty-four thousand",
2055 "five hundred and sixty-seven")
2056
2057into @words.
2058
2059Non-digits (apart from an optional leading plus or minus sign,
2060any decimal points, and ordinal suffixes -- see below) are silently
2061ignored, so the following all produce identical results:
2062
2063 NUMWORDS(5551202);
2064 NUMWORDS(5_551_202);
2065 NUMWORDS("5,551,202");
2066 NUMWORDS("555-1202");
2067
2068That last case is a little awkward since it's almost certainly a phone number,
2069and "five million, five hundred and fifty-one thousand, two hundred and two"
2070probably isn't what's wanted.
2071
2072To overcome this, C<NUMWORDS()> takes an optional named argument, 'group',
2073which changes how numbers are translated. The argument must be a
2074positive integer less than four, which indicated how the digits of the
2075number are to be grouped. If the argument is C<1>, then each digit is
2076translated separately. If the argument is C<2>, pairs of digits
2077(starting from the I<left>) are grouped together. If the argument is
2078C<3>, triples of numbers (again, from the I<left>) are grouped. Hence:
2079
2080 NUMWORDS("555-1202", group=>1)
2081
2082returns C<"five, five, five, one, two, zero, two">, whilst:
2083
2084 NUMWORDS("555-1202", group=>2)
2085
2086returns C<"fifty-five, fifty-one, twenty, two">, and:
2087
2088 NUMWORDS("555-1202", group=>3)
2089
2090returns C<"five fifty-five, one twenty, two">.
2091
2092Phone numbers are often written in words as
2093C<"five..five..five..one..two..zero..two">, which is also easy to
2094achieve:
2095
2096 join '..', NUMWORDS("555-1202", group=>1)
2097
2098C<NUMWORDS> also handles decimal fractions. Hence:
2099
2100 NUMWORDS("1.2345")
2101
2102returns C<"one point two three four five"> in a scalar context
2103and C<("one","point","two","three","four","five")>) in an array context.
2104Exponent form (C<"1.234e56">) is not yet handled.
2105
2106Multiple decimal points are only translated in one of the "grouping" modes.
2107Hence:
2108
2109 NUMWORDS(101.202.303)
2110
2111returns C<"one hundred and one point two zero two three zero three">,
2112whereas:
2113
2114 NUMWORDS(101.202.303, group=>1)
2115
2116returns C<"one zero one point two zero two point three zero three">.
2117
2118The digit C<'0'> is unusual in that in may be translated to English as "zero",
2119"oh", or "nought". To cater for this diversity, C<NUMWORDS> may be passed
2120a named argument, 'zero', which may be set to
2121the desired translation of C<'0'>. For example:
2122
2123 print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
2124
2125prints C<"five..five..five..one..two..oh..two">.
2126By default, zero is rendered as "zero".
2127
2128Likewise, the digit C<'1'> may be rendered as "one" or "a/an" (or very
2129occasionally other variants), depending on the context. So there is a
2130C<'one'> argument as well:
2131
2132 print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
2133 PL(" bottle of beer on the wall\n", $_)
2134 for (3,2,1,0);
2135
2136 # prints:
2137 # three bottles of beer on the wall
2138 # two bottles of beer on the wall
2139 # a solitary bottle of beer on the wall
2140 # no more bottles of beer on the wall
2141
2142Care is needed if the word "a/an" is to be used as a C<'one'> value.
2143Unless the next word is known in advance, it's almost always necessary
2144to use the C<A> function as well:
2145
2146 print A( NUMWORDS(1, one=>'a') . " $_\n")
2147 for qw(cat aardvark ewe hour);
2148
2149 # prints:
2150 # a cat
2151 # an aardvark
2152 # a ewe
2153 # an hour
2154
2155Another major regional variation in number translation is the use of
2156"and" in certain contexts. The named argument 'and'
2157allows the programmer to specify how "and" should be handled. Hence:
2158
2159 print scalar NUMWORDS("765", 'and'=>'')
2160
2161prints "seven hundred sixty-five", instead of "seven hundred and sixty-five".
2162By default, the "and" is included.
2163
2164The translation of the decimal point is also subject to variation
2165(with "point", "dot", and "decimal" being the favorites).
2166The named argument 'decimal' allows the
2167programmer to how the decimal point should be rendered. Hence:
2168
2169 print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
2170
2171prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one zero one"
2172By default, the decimal point is rendered as "point".
2173
2174C<NUMWORDS> also handles the ordinal forms of numbers. So:
2175
2176 print scalar NUMWORDS('1st');
2177 print scalar NUMWORDS('3rd');
2178 print scalar NUMWORDS('202nd');
2179 print scalar NUMWORDS('1000000th');
2180
2181print:
2182
2183 first
2184 third
2185 two hundred and twenty-second
2186 one millionth
2187
2188Two common idioms in this regard are:
2189
2190 print scalar NUMWORDS(ORD($number));
2191
2192and:
2193
2194 print scalar ORD(NUMWORDS($number));
2195
2196These are identical in effect, except when $number contains a decimal:
2197
2198 $number = 99.09;
2199 print scalar NUMWORDS(ORD($number)); # ninety-ninth point zero nine
2200 print scalar ORD(NUMWORDS($number)); # ninety-nine point zero ninth
2201
2202Use whichever you feel is most appropriate.
2203
2204
2205=head1 INTERPOLATING INFLECTIONS IN STRINGS
2206
2207By far the commonest use of the inflection subroutines is to
2208produce message strings for various purposes. For example:
2209
2210 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
2211 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
2212 if $severity > 1;
2213
2214Unfortunately the need to separate each subroutine call detracts
2215significantly from the readability of the resulting code. To ameliorate
2216this problem, Lingua::EN::Inflect provides an exportable string-interpolating
2217subroutine (C<inflect($)>), which recognizes calls to the various inflection
2218subroutines within a string and interpolates them appropriately.
2219
2220Using C<inflect> the previous example could be rewritten:
2221
2222 print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
2223 print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
2224 if $severity > 1;
2225
2226Note that C<inflect> also correctly handles calls to the C<NUM()> subroutine
2227(whether interpolated or antecedent). The C<inflect()> subroutine has
2228a related extra feature, in that it I<automatically> cancels any "default
2229number" value before it returns its interpolated string. This means that
2230calls to C<NUM()> which are embedded in an C<inflect()>-interpolated
2231string do not "escape" and interfere with subsequent inflections.
2232
2233
2234=head1 MODERN VS CLASSICAL INFLECTIONS
2235
2236Certain words, mainly of Latin or Ancient Greek origin, can form
2237plurals either using the standard English "-s" suffix, or with
2238their original Latin or Greek inflections. For example:
2239
2240 PL("stigma") # -> "stigmas" or "stigmata"
2241 PL("torus") # -> "toruses" or "tori"
2242 PL("index") # -> "indexes" or "indices"
2243 PL("millennium") # -> "millenniums" or "millennia"
2244 PL("ganglion") # -> "ganglions" or "ganglia"
2245 PL("octopus") # -> "octopuses" or "octopodes"
2246
2247
2248Lingua::EN::Inflect caters to such words by providing an
2249"alternate state" of inflection known as "classical mode".
2250By default, words are inflected using their contemporary English
2251plurals, but if classical mode is invoked, the more traditional
2252plural forms are returned instead.
2253
2254The exportable subroutine C<classical()> controls this feature.
2255If C<classical()> is called with no arguments, it unconditionally
2256invokes classical mode. If it is called with a single argument, it
2257turns all classical inflects on or off (depending on whether the argument is
2258true or false). If called with two or more arguments, those arguments
2259specify which aspects of classical behaviour are to be used.
2260
2261Thus:
2262
2263 classical; # SWITCH ON CLASSICAL MODE
2264 print PL("formula"); # -> "formulae"
2265
2266 classical 0; # SWITCH OFF CLASSICAL MODE
2267 print PL("formula"); # -> "formulas"
2268
2269 classical $cmode; # CLASSICAL MODE IFF $cmode
2270 print PL("formula"); # -> "formulae" (IF $cmode)
2271 # -> "formulas" (OTHERWISE)
2272
2273 classical herd=>1; # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
2274 print PL("wilderbeest"); # -> "wilderbeest"
2275
2276 classical names=>1; # SWITCH ON CLASSICAL MODE FOR NAMES
2277 print PL("sally"); # -> "sallies"
2278 print PL("Sally"); # -> "Sallys"
2279
2280Note however that C<classical()> has no effect on the inflection of words which
2281are now fully assimilated. Hence:
2282
2283 PL("forum") # ALWAYS -> "forums"
2284 PL("criterion") # ALWAYS -> "criteria"
2285
2286LEI assumes that a capitalized word is a person's name. So it forms the
2287plural according to the rules for names (which is that you don't
2288inflect, you just add -s or -es). You can choose to turn that behaviour
2289off (it's on by the default, even when the module isn't in classical
2290mode) by calling C< classical(names=>0) >;
2291
2292=head1 USER-DEFINED INFLECTIONS
2293
2294=head2 Adding plurals at run-time
2295
2296Lingua::EN::Inflect provides five exportable subroutines which allow
2297the programmer to override the module's behaviour for specific cases:
2298
2299=over 8
2300
2301=item C<def_noun($$)>
2302
2303The C<def_noun> subroutine takes a pair of string arguments: the singular and
2304plural forms of the noun being specified. The singular form
2305specifies a pattern to be interpolated (as C<m/^(?:$first_arg)$/i>).
2306Any noun matching this pattern is then replaced by the string in the
2307second argument. The second argument specifies a string which is
2308interpolated after the match succeeds, and is then used as the plural
2309form. For example:
2310
2311 def_noun 'cow' => 'kine';
2312 def_noun '(.+i)o' => '$1i';
2313 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
2314
2315Note that both arguments should usually be specified in single quotes,
2316so that they are not interpolated when they are specified, but later (when
2317words are compared to them). As indicated by the last example, care
2318also needs to be taken with certain characters in the second argument,
2319to ensure that they are not unintentionally interpolated during comparison.
2320
2321The second argument string may also specify a second variant of the plural
2322form, to be used when "classical" plurals have been requested. The beginning
2323of the second variant is marked by a '|' character:
2324
2325 def_noun 'cow' => 'cows|kine';
2326 def_noun '(.+i)o' => '$1os|$1i';
2327 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints';
2328
2329If no classical variant is given, the specified plural form is used in
2330both normal and "classical" modes.
2331
2332If the second argument is C<undef> instead of a string, then the
2333current user definition for the first argument is removed, and the
2334standard plural inflection(s) restored.
2335
2336Note that in all cases, later plural definitions for a particular
2337singular form replace earlier definitions of the same form. For example:
2338
2339 # FIRST, HIDE THE MODERN FORM....
2340 def_noun 'aviatrix' => 'aviatrices';
2341
2342 # LATER, HIDE THE CLASSICAL FORM...
2343 def_noun 'aviatrix' => 'aviatrixes';
2344
2345 # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
2346 def_noun 'aviatrix' => undef;
2347
2348
2349Special care is also required when defining general patterns and
2350associated specific exceptions: put the more specific cases I<after>
2351the general pattern. For example:
2352
2353 def_noun '(.+)us' => '$1i'; # EVERY "-us" TO "-i"
2354 def_noun 'bus' => 'buses'; # EXCEPT FOR "bus"
2355
2356This "try-most-recently-defined-first" approach to matching
2357user-defined words is also used by C<def_verb>, C<def_a> and C<def_an>.
2358
2359
2360=item C<def_verb($$$$$$)>
2361
2362The C<def_verb> subroutine takes three pairs of string arguments (that is, six
2363arguments in total), specifying the singular and plural forms of the three
2364"persons" of verb. As with C<def_noun>, the singular forms are specifications of
2365run-time-interpolated patterns, whilst the plural forms are specifications of
2366(up to two) run-time-interpolated strings:
2367
2368 def_verb 'am' => 'are',
2369 'are' => 'are|art",
2370 'is' => 'are';
2371
2372 def_verb 'have' => 'have',
2373 'have' => 'have",
2374 'ha(s|th)' => 'have';
2375
2376Note that as with C<def_noun>, modern/classical variants of plurals
2377may be separately specified, subsequent definitions replace previous
2378ones, and C<undef>'ed plural forms revert to the standard behaviour.
2379
2380
2381=item C<def_adj($$)>
2382
2383The C<def_adj> subroutine takes a pair of string arguments, which specify
2384the singular and plural forms of the adjective being defined.
2385As with C<def_noun> and C<def_adj>, the singular forms are specifications of
2386run-time-interpolated patterns, whilst the plural forms are specifications of
2387(up to two) run-time-interpolated strings:
2388
2389 def_adj 'this' => 'these',
2390 def_adj 'red' => 'red|gules',
2391
2392As previously, modern/classical variants of plurals
2393may be separately specified, subsequent definitions replace previous
2394ones, and C<undef>'ed plural forms revert to the standard behaviour.
2395
2396
2397=item C<def_a($)> and C<def_an($)>
2398
2399The C<def_a> and C<def_an> subroutines each take a single argument, which
2400specifies a pattern. If a word passed to C<A()> or C<AN()> matches this
2401pattern, it will be prefixed (unconditionally) with the corresponding indefinite
2402article. For example:
2403
2404 def_a 'error';
2405 def_a 'in.+';
2406
2407 def_an 'mistake';
2408 def_an 'error';
2409
2410As with the other C<def_...> subroutines, such redefinitions are sequential
2411in effect so that, after the above example, "error" will be inflected with "an".
2412
2413=back
2414
2415=head2 The F<$HOME/.inflectrc> file
2416
2417When it is imported, Lingua::EN::Inflect executes (as Perl code)
2418the contents of any file named F<.inflectrc> which it finds in the
2419in the directory where F<Lingua/EN/Inflect.pm> is installed,
2420or in the current home directory (C<$ENV{HOME}>), or in both.
2421Note that the code is executed within the Lingua::EN::Inflect
2422namespace.
2423
2424Hence the user or the local Perl guru can make appropriate calls to
2425C<def_noun>, C<def_verb>, etc. in one of these F<.inflectrc> files, to
2426permanently and universally modify the behaviour of the module. For example
2427
2428 > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
2429
2430 def_noun "UNIX" => "UN*X|UNICES";
2431
2432 def_verb "teco" => "teco", # LITERALLY: "to edit with TECO"
2433 "teco" => "teco",
2434 "tecos" => "teco";
2435
2436 def_a "Euler.*"; # "Yewler" TURNS IN HIS GRAVE
2437
2438
2439Note that calls to the C<def_...> subroutines from within a program
2440will take precedence over the contents of the home directory
2441F<.inflectrc> file, which in turn takes precedence over the system-wide
2442F<.inflectrc> file.
2443
2444
2445=head1 DIAGNOSTICS
2446
2447On loading, if the Perl code in a F<.inflectrc> file is invalid
2448(syntactically or otherwise), an appropriate fatal error is issued.
2449A common problem is not ending the file with something that
2450evaluates to true (as the five C<def_...> subroutines do).
2451
2452Using the five C<def_...> subroutines directly in a program may also
2453result in fatal diagnostics, if a (singular) pattern or an interpolated
2454(plural) string is somehow invalid.
2455
2456Specific diagnostics related to user-defined inflections are:
2457
2458=over 8
2459
2460=item C<"Bad user-defined singular pattern:\n\t %s">
2461
2462The singular form of a user-defined noun or verb
2463(as defined by a call to C<def_noun>, C<def_verb>, C<def_adj>,
2464C<def_a> or C<def_an>) is not a valid Perl regular expression. The
2465actual Perl error message is also given.
2466
2467=item C<"Bad user-defined plural string: '%s'">
2468
2469The plural form(s) of a user-defined noun or verb
2470(as defined by a call to C<def_noun>, C<def_verb> or C<def_adj>)
2471is not a valid Perl interpolated string (usually because it
2472interpolates some undefined variable).
2473
2474=item C<"Bad .inflectrc file (%s):\n %s">
2475
2476Some other problem occurred in loading the named local
2477or global F<.inflectrc> file. The Perl error message (including
2478the line number) is also given.
2479
2480=back
2481
2482There are I<no> diagnosable run-time error conditions for the actual
2483inflection subroutines, except C<NUMWORDS> and hence no run-time
2484diagnostics. If the inflection subroutines are unable to form a plural
2485via a user-definition or an inbuilt rule, they just "guess" the
2486commonest English inflection: adding "-s" for nouns, removing "-s" for
2487verbs, and no inflection for adjectives.
2488
2489C<Lingua::EN::Inflect::NUMWORDS()> can C<die> with the following messages:
2490
2491=over 8
2492
2493=item C<"Bad grouping option: %s">
2494
2495The optional argument to C<NUMWORDS()> wasn't 1, 2 or 3.
2496
2497=item C<"Number out of range">
2498
2499C<NUMWORDS()> was passed a number larger than
2500999,999,999,999,999,999,999,999,999,999,999,999 (that is: nine hundred
2501and ninety-nine decillion, nine hundred and ninety-nine nonillion, nine
2502hundred and ninety-nine octillion, nine hundred and ninety-nine
2503septillion, nine hundred and ninety-nine sextillion, nine hundred and
2504ninety-nine quintillion, nine hundred and ninety-nine quadrillion, nine
2505hundred and ninety-nine trillion, nine hundred and ninety-nine billion,
2506nine hundred and ninety-nine million, nine hundred and ninety-nine
2507thousand, nine hundred and ninety-nine :-)
2508
2509The problem is that C<NUMWORDS> doesn't know any
2510words for number components bigger than "decillion".
2511
2512
2513=head1 OTHER ISSUES
2514
2515=head2 2nd Person precedence
2516
2517If a verb has identical 1st and 2nd person singular forms, but
2518different 1st and 2nd person plural forms, then when its plural is
2519constructed, the 2nd person plural form is always preferred.
2520
2521The author is not currently aware of any such verbs in English, but is
2522not quite arrogant enough to assume I<ipso facto> that none exist.
2523
2524
2525=head2 Nominative precedence
2526
2527The singular pronoun "it" presents a special problem because its plural form
2528can vary, depending on its "case". For example:
2529
2530 It ate my homework -> They ate my homework
2531 It ate it -> They ate them
2532 I fed my homework to it -> I fed my homework to them
2533
2534As a consequence of this ambiguity, C<PL()> or C<PL_N> have been implemented
2535so that they always return the I<nominative> plural (that is, "they").
2536
2537However, when asked for the plural of an unambiguously I<accusative>
2538"it" (namely, C<PL("to it")>, C<PL_N("from it")>, C<PL("with it")>,
2539etc.), both subroutines will correctly return the accusative plural
2540("to them", "from them", "with them", etc.)
2541
2542
2543=head2 The plurality of zero
2544
2545The rules governing the choice between:
2546
2547 There were no errors.
2548
2549and
2550
2551 There was no error.
2552
2553are complex and often depend more on I<intent> rather than I<content>.
2554Hence it is infeasible to specify such rules algorithmically.
2555
2556Therefore, Lingua::EN::Text contents itself with the following compromise: If
2557the governing number is zero, inflections always return the plural form
2558unless the appropriate "classical" inflection is in effect, in which case the
2559singular form is always returned.
2560
2561Thus, the sequence:
2562
2563 NUM(0);
2564 print inflect "There PL(was) NO(choice)";
2565
2566produces "There were no choices", whereas:
2567
2568 classical 'zero'; # or: classical(zero=>1);
2569 NUM(0);
2570 print inflect "There PL(was) NO(choice)";
2571
2572it will print "There was no choice".
2573
2574
2575=head2 Homographs with heterogeneous plurals
2576
2577Another context in which intent (and not content) sometimes determines
2578plurality is where two distinct meanings of a word require different
2579plurals. For example:
2580
2581 Three basses were stolen from the band's equipment trailer.
2582 Three bass were stolen from the band's aquarium.
2583
2584 I put the mice next to the cheese.
2585 I put the mouses next to the computers.
2586
2587 Several thoughts about leaving crossed my mind.
2588 Several thought about leaving across my lawn.
2589
2590Lingua::EN::Inflect handles such words in two ways:
2591
2592=over 8
2593
2594=item *
2595
2596If both meanings of the word are the I<same> part of speech (for
2597example, "bass" is a noun in both sentences above), then one meaning
2598is chosen as the "usual" meaning, and only that meaning's plural is
2599ever returned by any of the inflection subroutines.
2600
2601=item *
2602
2603If each meaning of the word is a different part of speech (for
2604example, "thought" is both a noun and a verb), then the noun's
2605plural is returned by C<PL()> and C<PL_N()> and the verb's plural is
2606returned only by C<PL_V()>.
2607
2608=back
2609
2610Such contexts are, fortunately, uncommon (particularly
2611"same-part-of-speech" examples). An informal study of nearly 600
2612"difficult plurals" indicates that C<PL()> can be relied upon to "get
2613it right" about 98% of the time (although, of course, ichthyophilic
2614guitarists or cyber-behaviouralists may experience higher rates of
2615confusion).
2616
2617If the choice of a particular "usual inflection" is considered
2618inappropriate, it can always be reversed with a preliminary call
2619to the corresponding C<def_...> subroutine.
2620
2621=head1 NOTE
2622
2623I'm not taking any further correspondence on:
2624
2625=over
2626
2627=item "octopi".
2628
2629Despite the populist pandering of certain New World dictionaries, the
2630plural is "octopuses" or (for the pendantic classicist) "octopodes". The
2631suffix "-pus" is Greek, not Latin, so the plural is "-podes", not "pi".
2632
2633
2634=item "virus".
2635
2636Had no plural in Latin (possibly because it was a mass noun).
2637The only plural is the Anglicized "viruses".
2638
2639=back
2640
2641=head1 AUTHORS
2642
2643Damian Conway (damian@conway.org)
2644Matthew Persico (ORD inflection)
2645
2646
2647=head1 BUGS AND IRRITATIONS
2648
2649The endless inconsistencies of English.
2650
2651(I<Please> report words for which the correct plural or
2652indefinite article is not formed, so that the reliability
2653of Lingua::EN::Inflect can be improved.)
2654
2655
2656
2657=head1 COPYRIGHT
2658
2659 Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
2660 This module is free software. It may be used, redistributed
2661 and/or modified under the same terms as Perl itself.