1 package Lingua::EN::Inflect;
4 use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA);
14 ALL => [ qw( classical inflect
15 PL PL_N PL_V PL_ADJ NO NUM A AN
16 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
20 def_noun def_verb def_adj def_a def_an )],
22 INFLECTIONS => [ qw( classical inflect
23 PL PL_N PL_V PL_ADJ PL_eq
24 NO NUM A AN PART_PRES )],
26 PLURALS => [ qw( classical inflect
27 PL PL_N PL_V PL_ADJ NO NUM
28 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
30 COMPARISONS => [ qw( classical
31 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
33 ARTICLES => [ qw( classical inflect NUM A AN )],
35 NUMERICAL => [ qw( ORD NUMWORDS )],
37 USER_DEFINED => [ qw( def_noun def_verb def_adj def_a def_an )],
40 Exporter::export_ok_tags(qw( ALL ));
42 # SUPPORT CLASSICAL PLURALIZATIONS
62 my %classical = %def_classical;
64 my $classical_mode = join '|', keys %all_classical;
65 $classical_mode = qr/^(?:$classical_mode)$/;
70 %classical = %all_classical;
73 if (@_==1 && $_[0] !~ $classical_mode) {
74 %classical = $_[0] ? %all_classical : ();
79 if ($arg !~ $classical_mode) {
80 die "Unknown classical mode ($arg)\n";
82 if (@_ && $_[0] !~ $classical_mode) { $classical{$arg} = shift; }
83 else { $classical{$arg} = 1; }
86 %classical = $classical{all} ? %all_classical : ();
93 sub NUM # (;$count,$show)
97 $persistent_count = $_[0];
98 return $_[0] if !defined($_[1]) || $_[1];
102 $persistent_count = undef;
108 # 0. PERFORM GENERAL INFLECTIONS IN A STRING
110 sub enclose { "(?:$_[0])" }
114 my $save_persistent_count = $persistent_count;
115 my @sections = split /(NUM\([^)]*\))/, $_[0];
118 foreach ( @sections )
120 unless (s/NUM\(\s*?(?:([^),]*)(?:,([^)]*))?)?\)/ NUM($1,$2) /xe)
123 s/\bPL \( ([^),]*) (, ([^)]*) )? \) / PL($1,$3) /xeg
124 || s/\bPL_N \( ([^),]*) (, ([^)]*) )? \) / PL_N($1,$3) /xeg
125 || s/\bPL_V \( ([^),]*) (, ([^)]*) )? \) / PL_V($1,$3) /xeg
126 || s/\bPL_ADJ \( ([^),]*) (, ([^)]*) )? \) / PL_ADJ($1,$3) /xeg
127 || s/\bAN? \( ([^),]*) (, ([^)]*) )? \) / A($1,$3) /xeg
128 || s/\bNO \( ([^),]*) (, ([^)]*) )? \) / NO($1,$3) /xeg
129 || s/\bORD \( ([^)]*) \) / ORD($1) /xeg
130 || s/\bNUMWORDS \( ([^)]*) \) / NUMWORDS($1) /xeg
131 || s/\bPART_PRES \( ([^)]*) \) / PART_PRES($1) /xeg
137 $persistent_count = $save_persistent_count;
144 my %PL_sb_irregular_s =
146 "corpus" => "corpuses|corpora",
147 "opus" => "opuses|opera",
149 "mythos" => "mythoi",
150 "penis" => "penises|penes",
151 "testis" => "testes",
152 "atlas" => "atlases|atlantes",
155 my %PL_sb_irregular =
157 "child" => "children",
158 "brother" => "brothers|brethren",
160 "hoof" => "hoofs|hooves",
161 "beef" => "beefs|beeves",
163 "mongoose" => "mongooses",
165 "cow" => "cows|kine",
166 "soliloquy" => "soliloquies",
167 "graffito" => "graffiti",
168 "prima donna" => "prima donnas|prime donne",
169 "octopus" => "octopuses|octopodes",
170 "genie" => "genies|genii",
171 "ganglion" => "ganglions|ganglia",
172 "trilby" => "trilbys",
173 "turf" => "turfs|turves",
176 "occiput" => "occiputs|occipita",
181 my $PL_sb_irregular = enclose join '|', keys %PL_sb_irregular;
183 # CLASSICAL "..is" -> "..ides"
185 my @PL_sb_C_is_ides =
189 "ephemeris", "iris", "clitoris",
190 "chrysalis", "epididymis",
198 my $PL_sb_C_is_ides = enclose join "|", map { substr($_,0,-2) } @PL_sb_C_is_ides;
200 # CLASSICAL "..a" -> "..ata"
204 "anathema", "bema", "carcinoma", "charisma", "diploma",
205 "dogma", "drama", "edema", "enema", "enigma", "lemma",
206 "lymphoma", "magma", "melisma", "miasma", "oedema",
207 "sarcoma", "schema", "soma", "stigma", "stoma", "trauma",
211 my $PL_sb_C_a_ata = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_a_ata;
213 # UNCONDITIONAL "..a" -> "..ae"
215 my $PL_sb_U_a_ae = enclose join "|",
217 "alumna", "alga", "vertebra", "persona"
220 # CLASSICAL "..a" -> "..ae"
222 my $PL_sb_C_a_ae = enclose join "|",
224 "amoeba", "antenna", "formula", "hyperbola",
225 "medusa", "nebula", "parabola", "abscissa",
226 "hydra", "nova", "lacuna", "aurora", ".*umbra",
230 # CLASSICAL "..en" -> "..ina"
232 my $PL_sb_C_en_ina = enclose join "|", map { substr($_,0,-2) }
234 "stamen", "foramen", "lumen"
237 # UNCONDITIONAL "..um" -> "..a"
239 my $PL_sb_U_um_a = enclose join "|", map { substr($_,0,-2) }
241 "bacterium", "agendum", "desideratum", "erratum",
242 "stratum", "datum", "ovum", "extremum",
246 # CLASSICAL "..um" -> "..a"
248 my $PL_sb_C_um_a = enclose join "|", map { substr($_,0,-2) }
250 "maximum", "minimum", "momentum", "optimum",
251 "quantum", "cranium", "curriculum", "dictum",
252 "phylum", "aquarium", "compendium", "emporium",
253 "enconium", "gymnasium", "honorarium", "interregnum",
254 "lustrum", "memorandum", "millennium", "rostrum",
255 "spectrum", "speculum", "stadium", "trapezium",
256 "ultimatum", "medium", "vacuum", "velum",
260 # UNCONDITIONAL "..us" -> "i"
262 my $PL_sb_U_us_i = enclose join "|", map { substr($_,0,-2) }
264 "alumnus", "alveolus", "bacillus", "bronchus",
265 "locus", "nucleus", "stimulus", "meniscus",
268 # CLASSICAL "..us" -> "..i"
270 my $PL_sb_C_us_i = enclose join "|", map { substr($_,0,-2) }
272 "focus", "radius", "genius",
273 "incubus", "succubus", "nimbus",
274 "fungus", "nucleolus", "stylus",
275 "torus", "umbilicus", "uterus",
279 # CLASSICAL "..us" -> "..us" (ASSIMILATED 4TH DECLENSION LATIN NOUNS)
281 my $PL_sb_C_us_us = enclose join "|",
283 "status", "apparatus", "prospectus", "sinus",
284 "hiatus", "impetus", "plexus",
287 # UNCONDITIONAL "..on" -> "a"
289 my $PL_sb_U_on_a = enclose join "|", map { substr($_,0,-2) }
291 "criterion", "perihelion", "aphelion",
292 "phenomenon", "prolegomenon", "noumenon",
293 "organon", "asyndeton", "hyperbaton",
296 # CLASSICAL "..on" -> "..a"
298 my $PL_sb_C_on_a = enclose join "|", map { substr($_,0,-2) }
303 # CLASSICAL "..o" -> "..i" (BUT NORMALLY -> "..os")
307 "solo", "soprano", "basso", "alto",
308 "contralto", "tempo", "piano", "virtuoso",
310 my $PL_sb_C_o_i = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_o_i;
312 # ALWAYS "..o" -> "..os"
314 my $PL_sb_U_o_os = enclose join "|",
316 "albino", "archipelago", "armadillo",
317 "commando", "crescendo", "fiasco",
318 "ditto", "dynamo", "embryo",
319 "ghetto", "guano", "inferno",
320 "jumbo", "lumbago", "magneto",
321 "manifesto", "medico", "octavo",
322 "photo", "pro", "quarto",
323 "canto", "lingo", "generalissimo",
324 "stylo", "rhino", "casino",
325 "auto", "macro", 'zero',
331 # UNCONDITIONAL "..[ei]x" -> "..ices"
333 my $PL_sb_U_ex_ices = enclose join "|", map { substr($_,0,-2) }
335 "codex", "murex", "silex",
338 my $PL_sb_U_ix_ices = enclose join "|", map { substr($_,0,-2) }
343 # CLASSICAL "..[ei]x" -> "..ices"
345 my $PL_sb_C_ex_ices = enclose join "|", map { substr($_,0,-2) }
347 "vortex", "vertex", "cortex", "latex",
348 "pontifex", "apex", "index", "simplex",
351 my $PL_sb_C_ix_ices = enclose join "|", map { substr($_,0,-2) }
356 # ARABIC: ".." -> "..i"
358 my $PL_sb_C_i = enclose join "|",
360 "afrit", "afreet", "efreet",
363 # HEBREW: ".." -> "..im"
365 my $PL_sb_C_im = enclose join "|",
367 "goy", "seraph", "cherub",
370 # UNCONDITIONAL "..man" -> "..mans"
372 my $PL_sb_U_man_mans = enclose join "|",
375 Alabaman Bahaman Burman German
376 Hiroshiman Liman Nakayaman Oklahoman
377 Panaman Selman Sonaman Tacoman Yakiman
381 my @PL_sb_uninflected_s =
383 # PAIRS OR GROUPS SUBSUMED TO A SINGULAR...
384 "breeches", "britches", "clippers", "gallows", "hijinks",
385 "headquarters", "pliers", "scissors", "testes", "herpes",
386 "pincers", "shears", "proceedings", "trousers",
388 # UNASSIMILATED LATIN 4th DECLENSION
390 "cantus", "coitus", "nexus",
393 "contretemps", "corps", "debris",
397 ".*measles", "mumps",
399 # MISCELLANEOUS OTHERS...
400 "diabetes", "jackanapes", "series", "species", "rabies",
401 "chassis", "innings", "news", "mews",
404 my $PL_sb_uninflected_herd = enclose join "|",
405 # DON'T INFLECT IN CLASSICAL MODE, OTHERWISE NORMAL INFLECTION
407 "wildebeest", "swine", "eland", "bison", "buffalo",
408 "elk", "moose", "rhinoceros",
411 my $PL_sb_uninflected = enclose join "|",
413 # SOME FISH AND HERD ANIMALS
414 ".*fish", "tuna", "salmon", "mackerel", "trout",
415 "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
419 # ALL NATIONALS ENDING IN -ese
420 "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
421 "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
422 "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
423 "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
424 "Shavese", "Vermontese", "Wenchowese", "Yengeese",
427 # SOME WORDS ENDING IN ...s (OFTEN PAIRS TAKEN AS A WHOLE)
429 @PL_sb_uninflected_s,
439 # SINGULAR WORDS ENDING IN ...s (ALL INFLECT WITH ...es)
441 my $PL_sb_singular_s = enclose join '|',
444 "acropolis", "aegis", "alias", "asbestos", "bathos", "bias",
445 "bronchitis", "bursitis", "caddis", "cannabis",
446 "canvas", "chaos", "cosmos", "dais", "digitalis",
447 "epidermis", "ethos", "eyas", "gas", "glottis",
448 "hubris", "ibis", "lens", "mantis", "marquis", "metropolis",
449 "pathos", "pelvis", "polis", "rhinoceros",
450 "sassafras", "trellis", ".*us", "[A-Z].*es",
455 my $PL_v_special_s = enclose join '|',
458 @PL_sb_uninflected_s,
459 keys %PL_sb_irregular_s,
465 my %PL_sb_postfix_adj = (
466 'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
467 'martial' => [qw(court)],
470 foreach (keys %PL_sb_postfix_adj) {
471 $PL_sb_postfix_adj{$_} = enclose
472 enclose(join('|', @{$PL_sb_postfix_adj{$_}}))
473 . "(?=(?:-|\\s+)$_)";
476 my $PL_sb_postfix_adj = '(' . join('|', values %PL_sb_postfix_adj) . ')(.*)';
478 my $PL_sb_military = 'major|lieutenant|brigadier|adjutant|quartermaster';
479 my $PL_sb_general = '((?!'.$PL_sb_military.').*?)((-|\s+)general)';
481 my $PL_prep = enclose join '|', qw (
482 about above across after among around at athwart before behind
483 below beneath beside besides between betwixt beyond but by
484 during except for from in into near of off on onto out over
485 since till to under until unto upon with
488 my $PL_sb_prep_dual_compound = '(.*?)((?:-|\s+)(?:'.$PL_prep.'|d[eu])(?:-|\s+))a(?:-|\s+)(.*)';
490 my $PL_sb_prep_compound = '(.*?)((-|\s+)('.$PL_prep.'|d[eu])((-|\s+)(.*))?)';
495 # NOMINATIVE REFLEXIVE
497 "i" => "we", "myself" => "ourselves",
498 "you" => "you", "yourself" => "yourselves",
499 "she" => "they", "herself" => "themselves",
500 "he" => "they", "himself" => "themselves",
501 "it" => "they", "itself" => "themselves",
502 "they" => "they", "themself" => "themselves",
511 "theirs" => "theirs",
516 # ACCUSATIVE REFLEXIVE
518 "me" => "us", "myself" => "ourselves",
519 "you" => "you", "yourself" => "yourselves",
520 "her" => "them", "herself" => "themselves",
521 "him" => "them", "himself" => "themselves",
522 "it" => "them", "itself" => "themselves",
523 "them" => "them", "themself" => "themselves",
526 my $PL_pron_acc = enclose join '|', keys %PL_pron_acc;
528 my %PL_v_irregular_pres =
530 # 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR
533 "am" => "are", "are" => "are", "is" => "are",
534 "was" => "were", "were" => "were", "was" => "were",
535 "have" => "have", "have" => "have", "has" => "have",
536 "do" => "do", "do" => "do", "does" => "do",
539 my $PL_v_irregular_pres = enclose join '|', keys %PL_v_irregular_pres;
541 my %PL_v_ambiguous_pres =
543 # 1st PERS. SING. 2ND PERS. SING. 3RD PERS. SINGULAR
546 "act" => "act", "act" => "act", "acts" => "act",
547 "blame" => "blame", "blame" => "blame", "blames" => "blame",
548 "can" => "can", "can" => "can", "can" => "can",
549 "must" => "must", "must" => "must", "must" => "must",
550 "fly" => "fly", "fly" => "fly", "flies" => "fly",
551 "copy" => "copy", "copy" => "copy", "copies" => "copy",
552 "drink" => "drink", "drink" => "drink", "drinks" => "drink",
553 "fight" => "fight", "fight" => "fight", "fights" => "fight",
554 "fire" => "fire", "fire" => "fire", "fires" => "fire",
555 "like" => "like", "like" => "like", "likes" => "like",
556 "look" => "look", "look" => "look", "looks" => "look",
557 "make" => "make", "make" => "make", "makes" => "make",
558 "reach" => "reach", "reach" => "reach", "reaches" => "reach",
559 "run" => "run", "run" => "run", "runs" => "run",
560 "sink" => "sink", "sink" => "sink", "sinks" => "sink",
561 "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
562 "view" => "view", "view" => "view", "views" => "view",
565 my $PL_v_ambiguous_pres = enclose join '|', keys %PL_v_ambiguous_pres;
568 my $PL_v_irregular_non_pres = enclose join '|',
570 "did", "had", "ate", "made", "put",
571 "spent", "fought", "sank", "gave", "sought",
572 "shall", "could", "ought", "should",
575 my $PL_v_ambiguous_non_pres = enclose join '|',
577 "thought", "saw", "bent", "will", "might", "cut",
580 # "..oes" -> "..oe" (the rest are "..oes" -> "o")
582 my $PL_v_oes_oe = enclose join "|",
584 .*shoes .*hoes .*toes
585 canoes floes oboes roes throes woes
588 my $PL_count_zero = enclose join '|',
590 0, "no", "zero", "nil"
593 my $PL_count_one = enclose join '|',
595 1, "a", "an", "one", "each", "every", "this", "that",
600 "a" => "some", "an" => "some",
601 "this" => "these", "that" => "those",
603 my $PL_adj_special = enclose join '|', keys %PL_adj_special;
614 my $PL_adj_poss = enclose join '|', keys %PL_adj_poss;
619 local $SIG{__WARN__} = sub {0};
620 do {$@ =~ s/at.*?$//;
621 die "\nBad user-defined singular pattern:\n\t$@\n"}
622 if (!eval "'' =~ m/$_[0]/; 1;" or $@);
631 local $SIG{__WARN__} = sub {0};
632 do {$@ =~ s/at.*?$//;
633 die "\nBad user-defined plural string: '$_[1]'\n\t$@\n"}
634 if (!eval "qq{$_[1]}; 1;" or $@);
639 my @PL_sb_user_defined = ();
640 my @PL_v_user_defined = ();
641 my @PL_adj_user_defined = ();
642 my @A_a_user_defined = ();
646 unshift @PL_sb_user_defined, checkpatsubs(@_);
652 unshift @PL_v_user_defined, checkpatsubs(@_[4,5]);
653 unshift @PL_v_user_defined, checkpatsubs(@_[2,3]);
654 unshift @PL_v_user_defined, checkpatsubs(@_[0,1]);
660 unshift @PL_adj_user_defined, checkpatsubs(@_);
666 unshift @A_a_user_defined, checkpat(@_,'a');
672 unshift @A_a_user_defined, checkpat(@_,'an');
679 for (my $i=0; $i < @_; $i+=2)
681 if ($word =~ /^(?:$_[$i])$/i)
683 last unless defined $_[$i+1];
684 return eval '"'.$_[$i+1].'"';
692 local $SIG{__WARN__} = sub {0};
695 $rcfile = $INC{'Lingua//EN/Inflect.pm'} || '';
696 $rcfile =~ s/Inflect.pm$/.inflectrc/;
697 do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
698 if $rcfile && -r $rcfile && -s $rcfile;
700 $rcfile = "$ENV{HOME}/.inflectrc" || '';
701 do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
702 if $rcfile && -r $rcfile && -s $rcfile;
705 sub postprocess # FIX PEDANTRY AND CAPITALIZATION :-)
707 my ($orig, $inflected) = @_;
708 $inflected =~ s/([^|]+)\|(.+)/ $classical{all}?$2:$1 /e;
709 return $orig =~ /^I$/ ? $inflected
710 : $orig =~ /^[A-Z]+$/ ? uc $inflected
711 : $orig =~ /^[A-Z]/ ? ucfirst $inflected
718 my ($str, $count) = @_;
719 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
720 return $str unless $word;
721 my $plural = postprocess $word, _PL_special_adjective($word,$count)
722 || _PL_special_verb($word,$count)
723 || _PL_noun($word,$count);
724 return $pre.$plural.$post;
728 # PL_N($word,$number)
730 my ($str, $count) = @_;
731 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
732 return $str unless $word;
733 my $plural = postprocess $word, _PL_noun($word,$count);
734 return $pre.$plural.$post;
738 # PL_V($word,$number)
740 my ($str, $count) = @_;
741 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
742 return $str unless $word;
743 my $plural = postprocess $word, _PL_special_verb($word,$count)
744 || _PL_general_verb($word,$count);
745 return $pre.$plural.$post;
749 # PL_ADJ($word,$number)
751 my ($str, $count) = @_;
752 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
753 return $str unless $word;
754 my $plural = postprocess $word, _PL_special_adjective($word,$count)
756 return $pre.$plural.$post;
759 sub PL_eq { _PL_eq(@_, \&PL); }
760 sub PL_N_eq { _PL_eq(@_, \&PL_N); }
761 sub PL_V_eq { _PL_eq(@_, \&PL_V); }
762 sub PL_ADJ_eq { _PL_eq(@_, \&PL_ADJ); }
766 my ( $word1, $word2, $PL ) = @_;
767 my %classval = %classical;
768 %classical = %all_classical;
770 $result = "eq" if !$result && $word1 eq $word2;
771 $result = "p:s" if !$result && $word1 eq &$PL($word2);
772 $result = "s:p" if !$result && &$PL($word1) eq $word2;
774 $result = "p:s" if !$result && $word1 eq &$PL($word2);
775 $result = "s:p" if !$result && &$PL($word1) eq $word2;
776 %classical = %classval;
778 if ($PL == \&PL || $PL == \&PL_N)
781 if !$result && _PL_check_plurals_N($word1,$word2);
783 if !$result && _PL_check_plurals_N($word2,$word1);
785 if ($PL == \&PL || $PL == \&PL_ADJ)
788 if !$result && _PL_check_plurals_ADJ($word1,$word2,$PL);
796 $_[0] =~ /($_[1])($_[2]\|\1$_[3]|$_[3]\|\1$_[2])/
799 sub _PL_check_plurals_N
801 my $pair = "$_[0]|$_[1]";
802 foreach ( values %PL_sb_irregular_s ) { return 1 if $_ eq $pair; }
803 foreach ( values %PL_sb_irregular ) { return 1 if $_ eq $pair; }
805 return 1 if _PL_reg_plurals($pair, $PL_sb_C_a_ata, "as","ata")
806 || _PL_reg_plurals($pair, $PL_sb_C_is_ides, "is","ides")
807 || _PL_reg_plurals($pair, $PL_sb_C_a_ae, "s","e")
808 || _PL_reg_plurals($pair, $PL_sb_C_en_ina, "ens","ina")
809 || _PL_reg_plurals($pair, $PL_sb_C_um_a, "ums","a")
810 || _PL_reg_plurals($pair, $PL_sb_C_us_i, "uses","i")
811 || _PL_reg_plurals($pair, $PL_sb_C_on_a, "ons","a")
812 || _PL_reg_plurals($pair, $PL_sb_C_o_i, "os","i")
813 || _PL_reg_plurals($pair, $PL_sb_C_ex_ices, "exes","ices")
814 || _PL_reg_plurals($pair, $PL_sb_C_ix_ices, "ixes","ices")
815 || _PL_reg_plurals($pair, $PL_sb_C_i, "s","i")
816 || _PL_reg_plurals($pair, $PL_sb_C_im, "s","im")
818 || _PL_reg_plurals($pair, '.*eau', "s","x")
819 || _PL_reg_plurals($pair, '.*ieu', "s","x")
820 || _PL_reg_plurals($pair, '.*tri', "xes","ces")
821 || _PL_reg_plurals($pair, '.{2,}[yia]n', "xes","ges");
827 sub _PL_check_plurals_ADJ
829 my ( $word1a, $word2a ) = @_;
830 my ( $word1b, $word2b ) = @_;
832 $word1a = '' unless $word1a =~ s/'s?$//;
833 $word2a = '' unless $word2a =~ s/'s?$//;
834 $word1b = '' unless $word1b =~ s/s'$//;
835 $word2b = '' unless $word2b =~ s/s'$//;
839 return 1 if $word2a && ( _PL_check_plurals_N($word1a, $word2a)
840 || _PL_check_plurals_N($word2a, $word1a) );
841 return 1 if $word2b && ( _PL_check_plurals_N($word1a, $word2b)
842 || _PL_check_plurals_N($word2b, $word1a) );
846 return 1 if $word2a && ( _PL_check_plurals_N($word1b, $word2a)
847 || _PL_check_plurals_N($word2a, $word1b) );
848 return 1 if $word2b && ( _PL_check_plurals_N($word1b, $word2b)
849 || _PL_check_plurals_N($word2b, $word1b) );
858 my ( $word, $count ) = @_;
859 my $value; # UTILITY VARIABLE
863 $count = $persistent_count
864 if !defined($count) && defined($persistent_count);
866 $count = (defined $count and $count=~/^($PL_count_one)$/io
867 or defined $count and $classical{zero}
868 and $count=~/^($PL_count_zero)$/io)
872 return $word if $count==1;
874 # HANDLE USER-DEFINED NOUNS
876 return $value if defined($value = ud_match($word, @PL_sb_user_defined));
879 # HANDLE EMPTY WORD, SINGULAR COUNT AND UNINFLECTED PLURALS
881 $word eq '' and return $word;
883 $word =~ /^($PL_sb_uninflected)$/i
886 $classical{herd} and $word =~ /^($PL_sb_uninflected_herd)$/i
890 # HANDLE COMPOUNDS ("Governor General", "mother-in-law", "aide-de-camp", ETC.)
892 $word =~ /^(?:$PL_sb_postfix_adj)$/i
894 and return _PL_noun($1,2)
897 $word =~ /^(?:$PL_sb_prep_dual_compound)$/i
899 and return _PL_noun($1,2)
901 . _PL_noun($value->[1]);
903 $word =~ /^(?:$PL_sb_prep_compound)$/i
905 and return _PL_noun($1,2)
910 $word =~ /^((?:$PL_prep)\s+)($PL_pron_acc)$/i
911 and return $1.$PL_pron_acc{lc($2)};
913 $value = $PL_pron_nom{lc($word)}
916 $word =~ /^($PL_pron_acc)$/i
917 and return $PL_pron_acc{lc($1)};
919 # HANDLE ISOLATED IRREGULAR PLURALS
921 $word =~ /(.*)\b($PL_sb_irregular)$/i
922 and return $1 . $PL_sb_irregular{lc $2};
923 $word =~ /($PL_sb_U_man_mans)$/i
925 $word =~ /(\S*)(person)$/i and return $classical{persons}?"$1persons":"$1people";
927 # HANDLE FAMILIES OF IRREGULAR PLURALS
929 $word =~ /(.*)man$/i and return "$1men";
930 $word =~ /(.*[ml])ouse$/i and return "$1ice";
931 $word =~ /(.*)goose$/i and return "$1geese";
932 $word =~ /(.*)tooth$/i and return "$1teeth";
933 $word =~ /(.*)foot$/i and return "$1feet";
935 # HANDLE UNASSIMILATED IMPORTS
937 $word =~ /(.*)ceps$/i and return $word;
938 $word =~ /(.*)zoon$/i and return "$1zoa";
939 $word =~ /(.*[csx])is$/i and return "$1es";
940 $word =~ /($PL_sb_U_ex_ices)ex$/i and return "$1ices";
941 $word =~ /($PL_sb_U_ix_ices)ix$/i and return "$1ices";
942 $word =~ /($PL_sb_U_um_a)um$/i and return "$1a";
943 $word =~ /($PL_sb_U_us_i)us$/i and return "$1i";
944 $word =~ /($PL_sb_U_on_a)on$/i and return "$1a";
945 $word =~ /($PL_sb_U_a_ae)$/i and return "$1e";
947 # HANDLE INCOMPLETELY ASSIMILATED IMPORTS
949 if ($classical{ancient})
951 $word =~ /(.*)trix$/i and return "$1trices";
952 $word =~ /(.*)eau$/i and return "$1eaux";
953 $word =~ /(.*)ieu$/i and return "$1ieux";
954 $word =~ /(.{2,}[yia])nx$/i and return "$1nges";
955 $word =~ /($PL_sb_C_en_ina)en$/i and return "$1ina";
956 $word =~ /($PL_sb_C_ex_ices)ex$/i and return "$1ices";
957 $word =~ /($PL_sb_C_ix_ices)ix$/i and return "$1ices";
958 $word =~ /($PL_sb_C_um_a)um$/i and return "$1a";
959 $word =~ /($PL_sb_C_us_i)us$/i and return "$1i";
960 $word =~ /($PL_sb_C_us_us)$/i and return "$1";
961 $word =~ /($PL_sb_C_a_ae)$/i and return "$1e";
962 $word =~ /($PL_sb_C_a_ata)a$/i and return "$1ata";
963 $word =~ /($PL_sb_C_is_ides)is$/i and return "$1ides";
964 $word =~ /($PL_sb_C_o_i)o$/i and return "$1i";
965 $word =~ /($PL_sb_C_on_a)on$/i and return "$1a";
966 $word =~ /$PL_sb_C_im$/i and return "${word}im";
967 $word =~ /$PL_sb_C_i$/i and return "${word}i";
971 # HANDLE SINGULAR NOUNS ENDING IN ...s OR OTHER SILIBANTS
973 $word =~ /^($PL_sb_singular_s)$/i and return "$1es";
974 $word =~ /^([A-Z].*s)$/ and $classical{names} and return "$1es";
975 $word =~ /(.*)([cs]h|[zx])$/i and return "$1$2es";
976 # $word =~ /(.*)(us)$/i and return "$1$2es";
978 # HANDLE ...f -> ...ves
980 $word =~ /(.*[eao])lf$/i and return "$1lves";
981 $word =~ /(.*[^d])eaf$/i and return "$1eaves";
982 $word =~ /(.*[nlw])ife$/i and return "$1ives";
983 $word =~ /(.*)arf$/i and return "$1arves";
987 $word =~ /(.*[aeiou])y$/i and return "$1ys";
988 $word =~ /([A-Z].*y)$/ and $classical{names} and return "$1s";
989 $word =~ /(.*)y$/i and return "$1ies";
993 $word =~ /$PL_sb_U_o_os$/i and return "${word}s";
994 $word =~ /[aeiou]o$/i and return "${word}s";
995 $word =~ /o$/i and return "${word}es";
998 # OTHERWISE JUST ADD ...s
1004 sub _PL_special_verb
1006 my ( $word, $count ) = @_;
1007 $count = $persistent_count
1008 if !defined($count) && defined($persistent_count);
1009 $count = (defined $count and $count=~/^($PL_count_one)$/io or
1010 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1013 return undef if $count=~/^($PL_count_one)$/io;
1015 my $value; # UTILITY VARIABLE
1017 # HANDLE USER-DEFINED VERBS
1019 return $value if defined($value = ud_match($word, @PL_v_user_defined));
1021 # HANDLE IRREGULAR PRESENT TENSE (SIMPLE AND COMPOUND)
1023 $word =~ /^($PL_v_irregular_pres)((\s.*)?)$/i
1024 and return $PL_v_irregular_pres{lc $1}.$2;
1026 # HANDLE IRREGULAR FUTURE, PRETERITE AND PERFECT TENSES
1028 $word =~ /^($PL_v_irregular_non_pres)((\s.*)?)$/i
1031 # HANDLE PRESENT NEGATIONS (SIMPLE AND COMPOUND)
1033 $word =~ /^($PL_v_irregular_pres)(n't(\s.*)?)$/i
1034 and return $PL_v_irregular_pres{lc $1}.$2;
1036 $word =~ /^\S+n't\b/i
1039 # HANDLE SPECIAL CASES
1041 $word =~ /^($PL_v_special_s)$/ and return undef;
1042 $word =~ /\s/ and return undef;
1044 # HANDLE STANDARD 3RD PERSON (CHOP THE ...(e)s OFF SINGLE WORDS)
1046 $word =~ /^(.*)([cs]h|[x]|zz|ss)es$/i and return "$1$2";
1048 $word =~ /^(..+)ies$/i and return "$1y";
1050 $word =~ /($PL_v_oes_oe)$/ and return substr($1,0,-1);
1051 $word =~ /^(.+)oes$/i and return "$1o";
1053 $word =~ /^(.*[^s])s$/i and return $1;
1055 # OTHERWISE, A REGULAR VERB (HANDLE ELSEWHERE)
1060 sub _PL_general_verb
1062 my ( $word, $count ) = @_;
1063 $count = $persistent_count
1064 if !defined($count) && defined($persistent_count);
1065 $count = (defined $count and $count=~/^($PL_count_one)$/io or
1066 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1069 return $word if $count=~/^($PL_count_one)$/io;
1071 # HANDLE AMBIGUOUS PRESENT TENSES (SIMPLE AND COMPOUND)
1073 $word =~ /^($PL_v_ambiguous_pres)((\s.*)?)$/i
1074 and return $PL_v_ambiguous_pres{lc $1}.$2;
1076 # HANDLE AMBIGUOUS PRETERITE AND PERFECT TENSES
1078 $word =~ /^($PL_v_ambiguous_non_pres)((\s.*)?)$/i
1081 # OTHERWISE, 1st OR 2ND PERSON IS UNINFLECTED
1087 sub _PL_special_adjective
1089 my ( $word, $count ) = @_;
1090 $count = $persistent_count
1091 if !defined($count) && defined($persistent_count);
1092 $count = (defined $count and $count=~/^($PL_count_one)$/io or
1093 defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1096 return $word if $count=~/^($PL_count_one)$/io;
1099 # HANDLE USER-DEFINED ADJECTIVES
1102 return $value if defined($value = ud_match($word, @PL_adj_user_defined));
1104 # HANDLE KNOWN CASES
1106 $word =~ /^($PL_adj_special)$/i
1107 and return $PL_adj_special{lc $1};
1109 # HANDLE POSSESSIVES
1111 $word =~ /^($PL_adj_poss)$/i
1112 and return $PL_adj_poss{lc $1};
1114 $word =~ /^(.*)'s?$/ and do { my $pl = PL_N($1);
1115 return "$pl'" . ($pl =~ m/s$/ ? "" : "s");
1118 # OTHERWISE, NO IDEA
1125 # 2. INDEFINITE ARTICLES
1127 # THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND"
1128 # CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY
1129 # TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!)
1132 (?! FJO | [HLMNS]Y. | RY[EO] | SQU
1133 | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
1137 # THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A
1138 # 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE
1139 # IMPLIES AN ABBREVIATION.
1141 my $A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)';
1143 # EXCEPTIONS TO EXCEPTIONS
1145 my $A_explicit_an = enclose join '|',
1148 "hour(?!i)", "heir", "honest", "hono",
1153 my ($str, $count) = @_;
1154 my ($pre, $word, $post) = ( $str =~ m/\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z/i );
1155 return $str unless $word;
1156 my $result = _indef_article($word,$count);
1157 return $pre.$result.$post;
1164 my ( $word, $count ) = @_;
1166 $count = $persistent_count
1167 if !defined($count) && defined($persistent_count);
1169 return "$count $word"
1170 if defined $count && $count!~/^($PL_count_one)$/io;
1172 # HANDLE USER-DEFINED VARIANTS
1175 return $value if defined($value = ud_match($word, @A_a_user_defined));
1177 # HANDLE SPECIAL CASES
1179 $word =~ /^($A_explicit_an)/i and return "an $word";
1181 # HANDLE ABBREVIATIONS
1183 $word =~ /^($A_abbrev)/ox and return "an $word";
1184 $word =~ /^[aefhilmnorsx][.-]/i and return "an $word";
1185 $word =~ /^[a-z][.-]/i and return "a $word";
1189 $word =~ /^[^aeiouy]/i and return "a $word";
1191 # HANDLE SPECIAL VOWEL-FORMS
1193 $word =~ /^e[uw]/i and return "a $word";
1194 $word =~ /^onc?e\b/i and return "a $word";
1195 $word =~ /^uni([^nmd]|mo)/i and return "a $word";
1196 $word =~ /^u[bcfhjkqrst][aeiou]/i and return "a $word";
1198 # HANDLE SPECIAL CAPITALS
1200 $word =~ /^U[NK][AIEO]?/ and return "a $word";
1204 $word =~ /^[aeiou]/i and return "an $word";
1206 # HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND)
1208 $word =~ /^($A_y_cons)/io and return "an $word";
1210 # OTHERWISE, GUESS "a"
1214 # 2. TRANSLATE ZERO-QUANTIFIED $word TO "no PL($word)"
1218 my ($str, $count) = @_;
1219 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
1221 $count = $persistent_count
1222 if !defined($count) && defined($persistent_count);
1223 $count = 0 unless $count;
1225 return "$pre$count " . PL($word,$count) . $post
1226 unless $count =~ /^$PL_count_zero$/;
1227 return "${pre}no ". PL($word,0) . $post ;
1235 local $_ = PL_V(shift,2);
1243 or s/([^aeiou][aeiouy]([bdgmnprst]))$/$1$2/;
1249 # NUMERICAL INFLECTIONS
1270 @ordinal{qw(ty one two three five eight nine twelve )}=
1271 qw(tieth first second third fifth eighth ninth twelfth);
1273 my $ordinal_suff = join '|', keys %ordinal, "";
1275 $ordinal{""} = 'th';
1281 return $num . ($nth{$num%100} || $nth{$num%10});
1284 $num =~ s/($ordinal_suff)\Z/$ordinal{$1}/;
1297 'decimal' => 'point',
1300 my @unit = ('',qw(one two three four five six seven eight nine));
1301 my @teen = qw(ten eleven twelve thirteen fourteen
1302 fifteen sixteen seventeen eighteen nineteen);
1303 my @ten = ('','',qw(twenty thirty forty fifty sixty seventy eighty ninety));
1304 my @mill = map { (my $val=$_) =~ s/_/illion/; " $val" }
1305 ('',qw(thousand m_ b_ tr_ quadr_ quint_ sext_ sept_ oct_ non_ dec_));
1308 sub mill { my $ind = $_[0]||0;
1309 die "Number out of range\n" if $ind > $#mill;
1310 return $ind<@mill ? $mill[$ind] : ' ???illion'; }
1312 sub unit { return $unit[$_[0]]. mill($_[1]); }
1316 return $ten[$_[0]] . ($_[0]&&$_[1]?'-':'') . $unit[$_[1]] . mill($_[2])
1318 return $teen[$_[1]]. $mill[$_[2]||0];
1323 return unit($_[0]) . " hundred" . ($_[1] || $_[2] ? " $_[4] " : '')
1324 . ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[0];
1325 return ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[1] || $_[2];
1332 my ($num,$group,$zero,$one,$comma,$and) = @_;
1336 $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /eg;
1340 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /eg;
1341 $num =~ s/(\d)/ ($1 ? unit($1) :" $zero")."$comma " /e;
1345 $num =~ s/(\d)(\d)(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")." ".($2 ? ten($2,$3) : $3 ? " $zero " . unit($3) : " $zero $zero") . "$comma " /eg;
1346 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /e;
1347 $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /e;
1356 $num =~ s/\A\s*0+//;
1358 1 while $num =~ s/(\d)(\d)(\d)(?=\D*\Z)/ hund($1,$2,$3,$mill++,$and) /e;
1359 $num =~ s/(\d)(\d)(?=\D*\Z)/ ten($1,$2,$mill)."$comma " /e;
1360 $num =~ s/(\d)(?=\D*\Z)/ unit($1,$mill) . "$comma "/e;
1368 my %arg = ( %default_args, @_ );
1369 my $group = $arg{group};
1371 die "Bad chunking option: $group\n" unless $group =~ /\A[0-3]\Z/;
1372 my $sign = ($num =~ /\A\s*\+/) ? "plus"
1373 : ($num =~ /\A\s*\-/) ? "minus"
1376 my ($zero, $one) = @arg{'zero','one'};
1377 my $comma = $arg{comma};
1378 my $and = $arg{'and'};
1380 my $ord = $num =~ s/(st|nd|rd|th)\Z//;
1381 my @chunks = ($arg{decimal})
1382 ? $group ? split(/\./, $num) : split(/\./, $num, 2)
1387 if ($chunks[0] eq '') { $first=0; shift @chunks; }
1394 if (!$group && !$first) { $_ = enword($_,1,$zero,$one,$comma,$and) }
1395 else { $_ = enword($_,$group,$zero,$one,$comma,$and) }
1399 s/, (\S+)\s+\Z/ $and $1/ if !$group and $first;
1402 $first = '' if $first;
1408 unshift @chunks, '';
1412 @numchunks = split /\Q$comma /, $chunks[0];
1415 $numchunks[-1] =~ s/($ordinal_suff)\Z/$ordinal{$1}/
1416 if $ord and @numchunks;
1418 foreach (@chunks[1..$#chunks])
1420 push @numchunks, $arg{decimal};
1421 push @numchunks, split /\Q$comma /;
1426 unshift @numchunks, $sign if $sign;
1431 return ($sign?"$sign ":'') . join ", ", @numchunks;
1435 $num = ($sign?"$sign ":'') . shift @numchunks;
1436 $first = ($num !~ /$arg{decimal}\Z/);
1437 foreach ( @numchunks )
1439 if (/\A$arg{decimal}\Z/)
1446 $num .= "$comma $_";
1463 Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
1467 This document describes version 1.86 of Lingua::EN::Inflect,
1468 released October 20, 2000.
1472 use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
1473 PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
1478 def_noun def_verb def_adj def_a def_an );
1481 # UNCONDITIONALLY FORM THE PLURAL
1483 print "The plural of ", $word, " is ", PL($word), "\n";
1486 # CONDITIONALLY FORM THE PLURAL
1488 print "I saw $cat_count ", PL("cat",$cat_count), "\n";
1491 # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
1493 print PL_N("I",$N1), PL_V("saw",$N1),
1494 PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
1497 # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
1499 print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
1502 # USE DEFAULT COUNTS:
1504 print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
1505 print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
1508 # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
1510 print "same\n" if PL_eq($word1, $word2);
1511 print "same noun\n" if PL_eq_N($word1, $word2);
1512 print "same verb\n" if PL_eq_V($word1, $word2);
1513 print "same adj.\n" if PL_eq_ADJ($word1, $word2);
1516 # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
1518 print "Did you want ", A($thing), " or ", AN($idea), "\n";
1521 # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
1523 print "It was", ORD($position), " from the left\n";
1525 # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
1526 # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
1528 $words = NUMWORDS(1234); # "one thousand, two hundred and thirty-four"
1529 $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
1532 # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
1534 @words = NUMWORDS(1234); # ("one thousand","two hundred and thirty-four")
1537 # OPTIONAL PARAMETERS CHANGE TRANSLATION:
1539 $words = NUMWORDS(12345, group=>1);
1540 # "one, two, three, four, five"
1542 $words = NUMWORDS(12345, group=>2);
1543 # "twelve, thirty-four, five"
1545 $words = NUMWORDS(12345, group=>3);
1546 # "one twenty-three, forty-five"
1548 $words = NUMWORDS(1234, 'and'=>'');
1549 # "one thousand, two hundred thirty-four"
1551 $words = NUMWORDS(1234, 'and'=>', plus');
1552 # "one thousand, two hundred, plus thirty-four"
1554 $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
1555 # "five, five, five, one, two, oh, two"
1557 $words = NUMWORDS(555_1202, group=>1, one=>'unity');
1558 # "five, five, five, unity, two, oh, two"
1560 $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
1561 # "one two three mark four five six"
1564 # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
1566 classical; # USE ALL CLASSICAL PLURALS
1568 classical 1; # USE ALL CLASSICAL PLURALS
1569 classical 0; # USE ALL MODERN PLURALS (DEFAULT)
1571 classical 'zero'; # "no error" INSTEAD OF "no errors"
1572 classical zero=>1; # "no error" INSTEAD OF "no errors"
1573 classical zero=>0; # "no errors" INSTEAD OF "no error"
1575 classical 'herd'; # "2 buffalo" INSTEAD OF "2 buffalos"
1576 classical herd=>1; # "2 buffalo" INSTEAD OF "2 buffalos"
1577 classical herd=>0; # "2 buffalos" INSTEAD OF "2 buffalo"
1579 classical 'persons'; # "2 chairpersons" INSTEAD OF "2 chairpeople"
1580 classical persons=>1; # "2 chairpersons" INSTEAD OF "2 chairpeople"
1581 classical persons=>0; # "2 chairpeople" INSTEAD OF "2 chairpersons"
1583 classical 'ancient'; # "2 formulae" INSTEAD OF "2 formulas"
1584 classical ancient=>1; # "2 formulae" INSTEAD OF "2 formulas"
1585 classical ancient=>0; # "2 formulas" INSTEAD OF "2 formulae"
1589 # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
1590 # "NUM()" AND "ORD()" WITHIN STRINGS:
1592 print inflect("The plural of $word is PL($word)\n");
1593 print inflect("I saw $cat_count PL("cat",$cat_count)\n");
1594 print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)");
1595 print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)");
1596 print inflect("I saw NUM($cat_count) PL("cat")\nNUM()");
1597 print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
1598 print inflect("There NUM($errors,) PL_V(was) NO(error)\n";
1599 print inflect("Did you want A($thing) or AN($idea)\n");
1600 print inflect("It was ORD($position) from the left\n");
1603 # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
1605 def_noun "VAX" => "VAXen"; # SINGULAR => PLURAL
1607 def_verb "will" => "shall", # 1ST PERSON SINGULAR => PLURAL
1608 "will" => "will", # 2ND PERSON SINGULAR => PLURAL
1609 "will" => "will", # 3RD PERSON SINGULAR => PLURAL
1611 def_adj "hir" => "their", # SINGULAR => PLURAL
1613 def_a "h" # "AY HALWAYS SEZ 'HAITCH'!"
1615 def_an "horrendous.*" # "AN HORRENDOUS AFFECTATION"
1620 The exportable subroutines of Lingua::EN::Inflect provide plural
1621 inflections, "a"/"an" selection for English words, and manipulation
1624 Plural forms of all nouns, most verbs, and some adjectives are
1625 provided. Where appropriate, "classical" variants (for example: "brother" ->
1626 "brethren", "dogma" -> "dogmata", etc.) are also provided.
1628 Pronunciation-based "a"/"an" selection is provided for all English
1629 words, and most initialisms.
1631 It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd, 3rd)
1632 and to english words ("one", "two", "three).
1634 In generating these inflections, Lingua::EN::Inflect follows the Oxford
1635 English Dictionary and the guidelines in Fowler's Modern English
1636 Usage, preferring the former where the two disagree.
1638 The module is built around standard British spelling, but is designed
1639 to cope with common American variants as well. Slang, jargon, and
1640 other English dialects are I<not> explicitly catered for.
1642 Where two or more inflected forms exist for a single word (typically a
1643 "classical" form and a "modern" form), Lingua::EN::Inflect prefers the
1644 more common form (typically the "modern" one), unless "classical"
1645 processing has been specified
1646 (see L<"MODERN VS CLASSICAL INFLECTIONS">).
1648 =head1 FORMING PLURALS
1650 =head2 Inflecting Plurals
1652 All of the C<PL_...> plural inflection subroutines take the word to be
1653 inflected as their first argument and return the corresponding inflection.
1654 Note that all such subroutines expect the I<singular> form of the word. The
1655 results of passing a plural form are undefined (and unlikely to be correct).
1657 The C<PL_...> subroutines also take an optional second argument,
1658 which indicates the grammatical "number" of the word (or of another word
1659 with which the word being inflected must agree). If the "number" argument is
1660 supplied and is not C<1> (or C<"one"> or C<"a">, or some other adjective that
1661 implies the singular), the plural form of the word is returned. If the
1662 "number" argument I<does> indicate singularity, the (uninflected) word
1663 itself is returned. If the number argument is omitted, the plural form
1664 is returned unconditionally.
1666 The various subroutines are:
1672 The exportable subroutine C<PL_N()> takes a I<singular> English noun or
1673 pronoun and returns its plural. Pronouns in the nominative ("I" ->
1674 "we") and accusative ("me" -> "us") cases are handled, as are
1675 possessive pronouns ("mine" -> "ours").
1680 The exportable subroutine C<PL_V()> takes the I<singular> form of a
1681 conjugated verb (that is, one which is already in the correct "person"
1682 and "mood") and returns the corresponding plural conjugation.
1685 =item C<PL_ADJ($;$)>
1687 The exportable subroutine C<PL_ADJ()> takes the I<singular> form of
1688 certain types of adjectives and returns the corresponding plural form.
1689 Adjectives that are correctly handled include: "numerical" adjectives
1690 ("a" -> "some"), demonstrative adjectives ("this" -> "these", "that" ->
1691 "those"), and possessives ("my" -> "our", "cat's" -> "cats'", "child's"
1692 -> "childrens'", etc.)
1697 The exportable subroutine C<PL()> takes a I<singular> English noun,
1698 pronoun, verb, or adjective and returns its plural form. Where a word
1699 has more than one inflection depending on its part of speech (for
1700 example, the noun "thought" inflects to "thoughts", the verb "thought"
1701 to "thought"), the (singular) noun sense is preferred to the (singular)
1704 Hence C<PL("knife")> will return "knives" ("knife" having been treated
1705 as a singular noun), whereas C<PL("knifes")> will return "knife"
1706 ("knifes" having been treated as a 3rd person singular verb).
1708 The inherent ambiguity of such cases suggests that,
1709 where the part of speech is known, C<PL_N>, C<PL_V>, and
1710 C<PL_ADJ> should be used in preference to C<PL>.
1714 Note that all these subroutines ignore any whitespace surrounding the
1715 word being inflected, but preserve that whitespace when the result is
1716 returned. For example, C<S<PL(" cat ")>> returns S<" cats ">.
1719 =head2 Numbered plurals
1721 The C<PL_...> subroutines return only the inflected word, not the count that
1722 was used to inflect it. Thus, in order to produce "I saw 3 ducks", it
1723 is necessary to use:
1725 print "I saw $N ", PL_N($animal,$N), "\n";
1727 Since the usual purpose of producing a plural is to make it agree with
1728 a preceding count, Lingua::EN::Inflect provides an exportable subroutine
1729 (C<NO($;$)>) which, given a word and a(n optional) count, returns the
1730 count followed by the correctly inflected word. Hence the previous
1731 example can be rewritten:
1733 print "I saw ", NO($animal,$N), "\n";
1735 In addition, if the count is zero (or some other term which implies
1736 zero, such as C<"zero">, C<"nil">, etc.) the count is replaced by the
1737 word "no". Hence, if C<$N> had the value zero, the previous example
1738 would print the somewhat more elegant:
1746 Note that the name of the subroutine is a pun: the subroutine
1747 returns either a number (a I<No.>) or a C<"no">, in front of the
1751 =head2 Reducing the number of counts required
1753 In some contexts, the need to supply an explicit count to the various
1754 C<PL_...> subroutines makes for tiresome repetition. For example:
1756 print PL_ADJ("This",$errors), PL_N(" error",$errors),
1757 PL_V(" was",$errors), " fatal.\n";
1759 Lingua::EN::Inflect therefore provides an exportable subroutine
1760 (C<NUM($;$)>) which may be used to set a persistent "default number"
1761 value. If such a value is set, it is subsequently used whenever an
1762 optional second "number" argument is omitted. The default value thus set
1763 can subsequently be removed by calling C<NUM()> with no arguments.
1764 Hence we could rewrite the previous example:
1767 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
1770 Normally, C<NUM()> returns its first argument, so that it may also
1771 be "inlined" in contexts like:
1773 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
1774 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
1777 However, in certain contexts (see L<"INTERPOLATING INFLECTIONS IN STRINGS">)
1778 it is preferable that C<NUM()> return an empty string. Hence C<NUM()>
1779 provides an optional second argument. If that argument is supplied (that is, if
1780 it is defined) and evaluates to false, C<NUM> returns an empty string
1781 instead of its first argument. For example:
1783 print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
1784 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
1789 =head2 Number-insensitive equality
1791 Lingua::EN::Inflect also provides a solution to the problem
1792 of comparing words of differing plurality through the exportable subroutines
1793 C<PL_eq($$)>, C<PL_N_eq($$)>, C<PL_V_eq($$)>, and C<PL_ADJ_eq($$)>.
1794 Each of these subroutines takes two strings, and compares them
1795 using the corresponding plural-inflection subroutine (C<PL()>, C<PL_N()>,
1796 C<PL_V()>, and C<PL_ADJ()> respectively).
1798 The comparison returns true if:
1804 the strings are C<eq>-equal, or
1808 one string is C<eq>-equal to a plural form of the other, or
1812 the strings are two different plural forms of the one word.
1816 Hence all of the following return true:
1818 PL_eq("index","index") # RETURNS "eq"
1819 PL_eq("index","indexes") # RETURNS "s:p"
1820 PL_eq("index","indices") # RETURNS "s:p"
1821 PL_eq("indexes","index") # RETURNS "p:s"
1822 PL_eq("indices","index") # RETURNS "p:s"
1823 PL_eq("indices","indexes") # RETURNS "p:p"
1824 PL_eq("indexes","indices") # RETURNS "p:p"
1825 PL_eq("indices","indices") # RETURNS "eq"
1827 As indicated by the comments in the previous example, the actual value
1828 returned by the various C<PL_eq_...> subroutines encodes which of the
1829 three equality rules succeeded: "eq" is returned if the strings were
1830 identical, "s:p" if the strings were singular and plural respectively,
1831 "p:s" for plural and singular, and "p:p" for two distinct plurals.
1832 Inequality is indicated by returning an empty string.
1834 It should be noted that two distinct singular words which happen to take
1835 the same plural form are I<not> considered equal, nor are cases where
1836 one (singular) word's plural is the other (plural) word's singular.
1837 Hence all of the following return false:
1839 PL_eq("base","basis") # ALTHOUGH BOTH -> "bases"
1840 PL_eq("syrinx","syringe") # ALTHOUGH BOTH -> "syringes"
1841 PL_eq("she","he") # ALTHOUGH BOTH -> "they"
1843 PL_eq("opus","operas") # ALTHOUGH "opus" -> "opera" -> "operas"
1844 PL_eq("taxi","taxes") # ALTHOUGH "taxi" -> "taxis" -> "taxes"
1846 Note too that, although the comparison is "number-insensitive" it is I<not>
1847 case-insensitive (that is, C<PL("time","Times")> returns false. To obtain
1848 both number and case insensitivity, prefix both arguments with C<lc>
1849 (that is, C<PL(lc "time", lc "Times")> returns true).
1852 =head1 OTHER VERB FORMS
1854 =head2 Present participles
1856 C<Lingua::EN::Inflect> also provides the C<PART_PRES> subroutine,
1857 which can take a 3rd person singular verb and
1858 correctly inflect it to its present participle:
1860 PART_PRES("runs") # "running"
1861 PART_PRES("loves") # "loving"
1862 PART_PRES("eats") # "eating"
1863 PART_PRES("bats") # "batting"
1864 PART_PRES("spies") # "spying"
1867 =head1 PROVIDING INDEFINITE ARTICLES
1869 =head2 Selecting indefinite articles
1871 Lingua::EN::Inflect provides two exportable subroutines (C<A($;$)> and
1872 C<AN($;$)>) which will correctly prepend the appropriate indefinite
1873 article to a word, depending on its pronunciation. For example:
1875 A("cat") # -> "a cat"
1876 AN("cat") # -> "a cat"
1877 A("euphemism") # -> "a euphemism"
1878 A("Euler number") # -> "an Euler number"
1879 A("hour") # -> "an hour"
1880 A("houri") # -> "a houri"
1882 The two subroutines are I<identical> in function and may be used
1883 interchangeably. The only reason that two versions are provided is to
1884 enhance the readability of code such as:
1886 print "That is ", AN($errortype), " error\n;
1887 print "That is ", A($fataltype), " fatal error\n;
1889 Note that in both cases the actual article provided depends I<only> on
1890 the pronunciation of the first argument, I<not> on the name of the
1893 C<A()> and C<AN()> will ignore any indefinite article that already
1894 exists at the start of the string. Thus:
1903 print A($_), "\n" for @half_arked;
1912 C<A()> and C<AN()> both take an optional second argument. As with the
1913 C<PL_...> subroutines, this second argument is a "number" specifier. If
1914 its value is C<1> (or some other value implying singularity), C<A()> and
1915 C<AN()> insert "a" or "an" as appropriate. If the number specifier
1916 implies plurality, (C<A()> and C<AN()> insert the actual second argument instead.
1919 A("cat",1) # -> "a cat"
1920 A("cat",2) # -> "2 cat"
1921 A("cat","one") # -> "one cat"
1922 A("cat","no") # -> "no cat"
1924 Note that, as implied by the previous examples, C<A()> and
1925 C<AN()> both assume that their job is merely to provide the correct
1926 qualifier for a word (that is: "a", "an", or the specified count).
1927 In other words, they assume that the word they are given has
1928 already been correctly inflected for plurality. Hence, if C<$N>
1929 has the value 2, then:
1933 prints "2 cat", instead of "2 cats". The correct approach is to use:
1935 print A(PL("cat",$N),$N);
1941 Note too that, like the various C<PL_...> subroutines, whenever C<A()>
1942 and C<AN()> are called with only one argument they are subject to the
1943 effects of any preceding call to C<NUM()>. Hence, another possible
1950 =head2 Indefinite articles and initialisms
1952 "Initialisms" (sometimes inaccurately called "acronyms") are terms which
1953 have been formed from the initial letters of words in a phrase (for
1954 example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
1956 Such terms present a particular challenge when selecting between "a"
1957 and "an", since they are sometimes pronounced as if they were a single
1958 word ("nay-tow", "sku-ba") and sometimes as a series of letter names
1959 ("en-eff-ell", "ess-oh-ess").
1961 C<A()> and C<AN()> cope with this dichotomy using a series of inbuilt
1962 rules, which may be summarized as:
1968 If the word starts with a single letter, followed by a period or dash
1969 (for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray", "T-square"), then
1970 choose the appropriate article for the I<sound> of the first letter
1971 ("an R.I.P.", "a C.O.D.", "an e-mail", "an X-ray", "a T-square").
1975 If the first two letters of the word are capitals,
1976 consonants, and do not appear at the start of any known English word,
1977 (for example, "LCD", "XML", "YWCA"), then once again choose "a" or
1978 "an" depending on the I<sound> of the first letter ("an LCD", "an
1983 Otherwise, assume the string is a capitalized word or a
1984 pronounceable initialism (for example, "LED", "OPEC", "FAQ", "UNESCO"), and
1985 therefore takes "a" or "an" according to the (apparent) pronunciation of
1986 the entire word ("a LED", "an OPEC", "a FAQ", "a UNESCO").
1990 Note that rules 1 and 3 together imply that the presence or absence of
1991 punctuation may change the selection of indefinite article for a
1992 particular initialism (for example, "a FAQ" but "an F.A.Q.").
1995 =head2 Indefinite articles and "soft H's"
1997 Words beginning in the letter 'H' present another type of difficulty
1998 when selecting a suitable indefinite article. In a few such words
1999 (for example, "hour", "honour", "heir") the 'H' is not voiced at
2000 all, and so such words inflect with "an". The remaining cases
2001 ("voiced H's") may be divided into two categories:
2002 "hard H's" (such as "hangman", "holograph", "hat", etc.) and
2003 "soft H's" (such as "hysterical", "horrendous", "holy", etc.)
2005 Hard H's always take "a" as their indefinite article, and soft
2006 H's normally do so as well. But I<some> English speakers prefer
2007 "an" for soft H's (although the practice is now generally considered an
2008 affectation, rather than a legitimate grammatical alternative).
2010 At present, the C<A()> and C<AN()> subroutines ignore soft H's and use
2011 "a" for any voiced 'H'. The author would, however, welcome feedback on
2012 this decision (envisaging a possible future "soft H" mode).
2015 =head1 INFLECTING ORDINALS
2017 Occasionally it is useful to present an integer value as an ordinal
2018 rather than as a numeral. For example:
2020 Enter password (1st attempt): ********
2021 Enter password (2nd attempt): *********
2022 Enter password (3rd attempt): *********
2023 No 4th attempt. Access denied.
2025 To this end, Lingua::EN::Inflect provides the C<ORD()> subroutine.
2026 <ORD()> takes a single argument and forms its ordinal equivalent.
2027 If the argument isn't a numerical integer, it just adds "-th".
2030 =head1 CONVERTING NUMBERS TO WORDS
2032 The exportable subroutine C<NUMWORDS> takes a number (cardinal or ordinal)
2033 and returns an English representation of that number. In a scalar context
2034 a string is returned. Hence:
2036 use Lingua::EN::Inflect qw( NUMWORDS );
2038 $words = NUMWORDS(1234567);
2042 "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
2046 In a list context each comma-separated chunk is returned as a separate element.
2049 @words = NUMWORDS(1234567);
2054 "two hundred and thirty-four thousand",
2055 "five hundred and sixty-seven")
2059 Non-digits (apart from an optional leading plus or minus sign,
2060 any decimal points, and ordinal suffixes -- see below) are silently
2061 ignored, so the following all produce identical results:
2064 NUMWORDS(5_551_202);
2065 NUMWORDS("5,551,202");
2066 NUMWORDS("555-1202");
2068 That last case is a little awkward since it's almost certainly a phone number,
2069 and "five million, five hundred and fifty-one thousand, two hundred and two"
2070 probably isn't what's wanted.
2072 To overcome this, C<NUMWORDS()> takes an optional named argument, 'group',
2073 which changes how numbers are translated. The argument must be a
2074 positive integer less than four, which indicated how the digits of the
2075 number are to be grouped. If the argument is C<1>, then each digit is
2076 translated separately. If the argument is C<2>, pairs of digits
2077 (starting from the I<left>) are grouped together. If the argument is
2078 C<3>, triples of numbers (again, from the I<left>) are grouped. Hence:
2080 NUMWORDS("555-1202", group=>1)
2082 returns C<"five, five, five, one, two, zero, two">, whilst:
2084 NUMWORDS("555-1202", group=>2)
2086 returns C<"fifty-five, fifty-one, twenty, two">, and:
2088 NUMWORDS("555-1202", group=>3)
2090 returns C<"five fifty-five, one twenty, two">.
2092 Phone numbers are often written in words as
2093 C<"five..five..five..one..two..zero..two">, which is also easy to
2096 join '..', NUMWORDS("555-1202", group=>1)
2098 C<NUMWORDS> also handles decimal fractions. Hence:
2102 returns C<"one point two three four five"> in a scalar context
2103 and C<("one","point","two","three","four","five")>) in an array context.
2104 Exponent form (C<"1.234e56">) is not yet handled.
2106 Multiple decimal points are only translated in one of the "grouping" modes.
2109 NUMWORDS(101.202.303)
2111 returns C<"one hundred and one point two zero two three zero three">,
2114 NUMWORDS(101.202.303, group=>1)
2116 returns C<"one zero one point two zero two point three zero three">.
2118 The digit C<'0'> is unusual in that in may be translated to English as "zero",
2119 "oh", or "nought". To cater for this diversity, C<NUMWORDS> may be passed
2120 a named argument, 'zero', which may be set to
2121 the desired translation of C<'0'>. For example:
2123 print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
2125 prints C<"five..five..five..one..two..oh..two">.
2126 By default, zero is rendered as "zero".
2128 Likewise, the digit C<'1'> may be rendered as "one" or "a/an" (or very
2129 occasionally other variants), depending on the context. So there is a
2130 C<'one'> argument as well:
2132 print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
2133 PL(" bottle of beer on the wall\n", $_)
2137 # three bottles of beer on the wall
2138 # two bottles of beer on the wall
2139 # a solitary bottle of beer on the wall
2140 # no more bottles of beer on the wall
2142 Care is needed if the word "a/an" is to be used as a C<'one'> value.
2143 Unless the next word is known in advance, it's almost always necessary
2144 to use the C<A> function as well:
2146 print A( NUMWORDS(1, one=>'a') . " $_\n")
2147 for qw(cat aardvark ewe hour);
2155 Another major regional variation in number translation is the use of
2156 "and" in certain contexts. The named argument 'and'
2157 allows the programmer to specify how "and" should be handled. Hence:
2159 print scalar NUMWORDS("765", 'and'=>'')
2161 prints "seven hundred sixty-five", instead of "seven hundred and sixty-five".
2162 By default, the "and" is included.
2164 The translation of the decimal point is also subject to variation
2165 (with "point", "dot", and "decimal" being the favorites).
2166 The named argument 'decimal' allows the
2167 programmer to how the decimal point should be rendered. Hence:
2169 print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
2171 prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one zero one"
2172 By default, the decimal point is rendered as "point".
2174 C<NUMWORDS> also handles the ordinal forms of numbers. So:
2176 print scalar NUMWORDS('1st');
2177 print scalar NUMWORDS('3rd');
2178 print scalar NUMWORDS('202nd');
2179 print scalar NUMWORDS('1000000th');
2185 two hundred and twenty-second
2188 Two common idioms in this regard are:
2190 print scalar NUMWORDS(ORD($number));
2194 print scalar ORD(NUMWORDS($number));
2196 These are identical in effect, except when $number contains a decimal:
2199 print scalar NUMWORDS(ORD($number)); # ninety-ninth point zero nine
2200 print scalar ORD(NUMWORDS($number)); # ninety-nine point zero ninth
2202 Use whichever you feel is most appropriate.
2205 =head1 INTERPOLATING INFLECTIONS IN STRINGS
2207 By far the commonest use of the inflection subroutines is to
2208 produce message strings for various purposes. For example:
2210 print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
2211 print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
2214 Unfortunately the need to separate each subroutine call detracts
2215 significantly from the readability of the resulting code. To ameliorate
2216 this problem, Lingua::EN::Inflect provides an exportable string-interpolating
2217 subroutine (C<inflect($)>), which recognizes calls to the various inflection
2218 subroutines within a string and interpolates them appropriately.
2220 Using C<inflect> the previous example could be rewritten:
2222 print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
2223 print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
2226 Note that C<inflect> also correctly handles calls to the C<NUM()> subroutine
2227 (whether interpolated or antecedent). The C<inflect()> subroutine has
2228 a related extra feature, in that it I<automatically> cancels any "default
2229 number" value before it returns its interpolated string. This means that
2230 calls to C<NUM()> which are embedded in an C<inflect()>-interpolated
2231 string do not "escape" and interfere with subsequent inflections.
2234 =head1 MODERN VS CLASSICAL INFLECTIONS
2236 Certain words, mainly of Latin or Ancient Greek origin, can form
2237 plurals either using the standard English "-s" suffix, or with
2238 their original Latin or Greek inflections. For example:
2240 PL("stigma") # -> "stigmas" or "stigmata"
2241 PL("torus") # -> "toruses" or "tori"
2242 PL("index") # -> "indexes" or "indices"
2243 PL("millennium") # -> "millenniums" or "millennia"
2244 PL("ganglion") # -> "ganglions" or "ganglia"
2245 PL("octopus") # -> "octopuses" or "octopodes"
2248 Lingua::EN::Inflect caters to such words by providing an
2249 "alternate state" of inflection known as "classical mode".
2250 By default, words are inflected using their contemporary English
2251 plurals, but if classical mode is invoked, the more traditional
2252 plural forms are returned instead.
2254 The exportable subroutine C<classical()> controls this feature.
2255 If C<classical()> is called with no arguments, it unconditionally
2256 invokes classical mode. If it is called with a single argument, it
2257 turns all classical inflects on or off (depending on whether the argument is
2258 true or false). If called with two or more arguments, those arguments
2259 specify which aspects of classical behaviour are to be used.
2263 classical; # SWITCH ON CLASSICAL MODE
2264 print PL("formula"); # -> "formulae"
2266 classical 0; # SWITCH OFF CLASSICAL MODE
2267 print PL("formula"); # -> "formulas"
2269 classical $cmode; # CLASSICAL MODE IFF $cmode
2270 print PL("formula"); # -> "formulae" (IF $cmode)
2271 # -> "formulas" (OTHERWISE)
2273 classical herd=>1; # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
2274 print PL("wilderbeest"); # -> "wilderbeest"
2276 classical names=>1; # SWITCH ON CLASSICAL MODE FOR NAMES
2277 print PL("sally"); # -> "sallies"
2278 print PL("Sally"); # -> "Sallys"
2280 Note however that C<classical()> has no effect on the inflection of words which
2281 are now fully assimilated. Hence:
2283 PL("forum") # ALWAYS -> "forums"
2284 PL("criterion") # ALWAYS -> "criteria"
2286 LEI assumes that a capitalized word is a person's name. So it forms the
2287 plural according to the rules for names (which is that you don't
2288 inflect, you just add -s or -es). You can choose to turn that behaviour
2289 off (it's on by the default, even when the module isn't in classical
2290 mode) by calling C< classical(names=>0) >;
2292 =head1 USER-DEFINED INFLECTIONS
2294 =head2 Adding plurals at run-time
2296 Lingua::EN::Inflect provides five exportable subroutines which allow
2297 the programmer to override the module's behaviour for specific cases:
2301 =item C<def_noun($$)>
2303 The C<def_noun> subroutine takes a pair of string arguments: the singular and
2304 plural forms of the noun being specified. The singular form
2305 specifies a pattern to be interpolated (as C<m/^(?:$first_arg)$/i>).
2306 Any noun matching this pattern is then replaced by the string in the
2307 second argument. The second argument specifies a string which is
2308 interpolated after the match succeeds, and is then used as the plural
2311 def_noun 'cow' => 'kine';
2312 def_noun '(.+i)o' => '$1i';
2313 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
2315 Note that both arguments should usually be specified in single quotes,
2316 so that they are not interpolated when they are specified, but later (when
2317 words are compared to them). As indicated by the last example, care
2318 also needs to be taken with certain characters in the second argument,
2319 to ensure that they are not unintentionally interpolated during comparison.
2321 The second argument string may also specify a second variant of the plural
2322 form, to be used when "classical" plurals have been requested. The beginning
2323 of the second variant is marked by a '|' character:
2325 def_noun 'cow' => 'cows|kine';
2326 def_noun '(.+i)o' => '$1os|$1i';
2327 def_noun 'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints';
2329 If no classical variant is given, the specified plural form is used in
2330 both normal and "classical" modes.
2332 If the second argument is C<undef> instead of a string, then the
2333 current user definition for the first argument is removed, and the
2334 standard plural inflection(s) restored.
2336 Note that in all cases, later plural definitions for a particular
2337 singular form replace earlier definitions of the same form. For example:
2339 # FIRST, HIDE THE MODERN FORM....
2340 def_noun 'aviatrix' => 'aviatrices';
2342 # LATER, HIDE THE CLASSICAL FORM...
2343 def_noun 'aviatrix' => 'aviatrixes';
2345 # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
2346 def_noun 'aviatrix' => undef;
2349 Special care is also required when defining general patterns and
2350 associated specific exceptions: put the more specific cases I<after>
2351 the general pattern. For example:
2353 def_noun '(.+)us' => '$1i'; # EVERY "-us" TO "-i"
2354 def_noun 'bus' => 'buses'; # EXCEPT FOR "bus"
2356 This "try-most-recently-defined-first" approach to matching
2357 user-defined words is also used by C<def_verb>, C<def_a> and C<def_an>.
2360 =item C<def_verb($$$$$$)>
2362 The C<def_verb> subroutine takes three pairs of string arguments (that is, six
2363 arguments in total), specifying the singular and plural forms of the three
2364 "persons" of verb. As with C<def_noun>, the singular forms are specifications of
2365 run-time-interpolated patterns, whilst the plural forms are specifications of
2366 (up to two) run-time-interpolated strings:
2368 def_verb 'am' => 'are',
2372 def_verb 'have' => 'have',
2374 'ha(s|th)' => 'have';
2376 Note that as with C<def_noun>, modern/classical variants of plurals
2377 may be separately specified, subsequent definitions replace previous
2378 ones, and C<undef>'ed plural forms revert to the standard behaviour.
2381 =item C<def_adj($$)>
2383 The C<def_adj> subroutine takes a pair of string arguments, which specify
2384 the singular and plural forms of the adjective being defined.
2385 As with C<def_noun> and C<def_adj>, the singular forms are specifications of
2386 run-time-interpolated patterns, whilst the plural forms are specifications of
2387 (up to two) run-time-interpolated strings:
2389 def_adj 'this' => 'these',
2390 def_adj 'red' => 'red|gules',
2392 As previously, modern/classical variants of plurals
2393 may be separately specified, subsequent definitions replace previous
2394 ones, and C<undef>'ed plural forms revert to the standard behaviour.
2397 =item C<def_a($)> and C<def_an($)>
2399 The C<def_a> and C<def_an> subroutines each take a single argument, which
2400 specifies a pattern. If a word passed to C<A()> or C<AN()> matches this
2401 pattern, it will be prefixed (unconditionally) with the corresponding indefinite
2402 article. For example:
2410 As with the other C<def_...> subroutines, such redefinitions are sequential
2411 in effect so that, after the above example, "error" will be inflected with "an".
2415 =head2 The F<$HOME/.inflectrc> file
2417 When it is imported, Lingua::EN::Inflect executes (as Perl code)
2418 the contents of any file named F<.inflectrc> which it finds in the
2419 in the directory where F<Lingua/EN/Inflect.pm> is installed,
2420 or in the current home directory (C<$ENV{HOME}>), or in both.
2421 Note that the code is executed within the Lingua::EN::Inflect
2424 Hence the user or the local Perl guru can make appropriate calls to
2425 C<def_noun>, C<def_verb>, etc. in one of these F<.inflectrc> files, to
2426 permanently and universally modify the behaviour of the module. For example
2428 > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
2430 def_noun "UNIX" => "UN*X|UNICES";
2432 def_verb "teco" => "teco", # LITERALLY: "to edit with TECO"
2436 def_a "Euler.*"; # "Yewler" TURNS IN HIS GRAVE
2439 Note that calls to the C<def_...> subroutines from within a program
2440 will take precedence over the contents of the home directory
2441 F<.inflectrc> file, which in turn takes precedence over the system-wide
2447 On loading, if the Perl code in a F<.inflectrc> file is invalid
2448 (syntactically or otherwise), an appropriate fatal error is issued.
2449 A common problem is not ending the file with something that
2450 evaluates to true (as the five C<def_...> subroutines do).
2452 Using the five C<def_...> subroutines directly in a program may also
2453 result in fatal diagnostics, if a (singular) pattern or an interpolated
2454 (plural) string is somehow invalid.
2456 Specific diagnostics related to user-defined inflections are:
2460 =item C<"Bad user-defined singular pattern:\n\t %s">
2462 The singular form of a user-defined noun or verb
2463 (as defined by a call to C<def_noun>, C<def_verb>, C<def_adj>,
2464 C<def_a> or C<def_an>) is not a valid Perl regular expression. The
2465 actual Perl error message is also given.
2467 =item C<"Bad user-defined plural string: '%s'">
2469 The plural form(s) of a user-defined noun or verb
2470 (as defined by a call to C<def_noun>, C<def_verb> or C<def_adj>)
2471 is not a valid Perl interpolated string (usually because it
2472 interpolates some undefined variable).
2474 =item C<"Bad .inflectrc file (%s):\n %s">
2476 Some other problem occurred in loading the named local
2477 or global F<.inflectrc> file. The Perl error message (including
2478 the line number) is also given.
2482 There are I<no> diagnosable run-time error conditions for the actual
2483 inflection subroutines, except C<NUMWORDS> and hence no run-time
2484 diagnostics. If the inflection subroutines are unable to form a plural
2485 via a user-definition or an inbuilt rule, they just "guess" the
2486 commonest English inflection: adding "-s" for nouns, removing "-s" for
2487 verbs, and no inflection for adjectives.
2489 C<Lingua::EN::Inflect::NUMWORDS()> can C<die> with the following messages:
2493 =item C<"Bad grouping option: %s">
2495 The optional argument to C<NUMWORDS()> wasn't 1, 2 or 3.
2497 =item C<"Number out of range">
2499 C<NUMWORDS()> was passed a number larger than
2500 999,999,999,999,999,999,999,999,999,999,999,999 (that is: nine hundred
2501 and ninety-nine decillion, nine hundred and ninety-nine nonillion, nine
2502 hundred and ninety-nine octillion, nine hundred and ninety-nine
2503 septillion, nine hundred and ninety-nine sextillion, nine hundred and
2504 ninety-nine quintillion, nine hundred and ninety-nine quadrillion, nine
2505 hundred and ninety-nine trillion, nine hundred and ninety-nine billion,
2506 nine hundred and ninety-nine million, nine hundred and ninety-nine
2507 thousand, nine hundred and ninety-nine :-)
2509 The problem is that C<NUMWORDS> doesn't know any
2510 words for number components bigger than "decillion".
2515 =head2 2nd Person precedence
2517 If a verb has identical 1st and 2nd person singular forms, but
2518 different 1st and 2nd person plural forms, then when its plural is
2519 constructed, the 2nd person plural form is always preferred.
2521 The author is not currently aware of any such verbs in English, but is
2522 not quite arrogant enough to assume I<ipso facto> that none exist.
2525 =head2 Nominative precedence
2527 The singular pronoun "it" presents a special problem because its plural form
2528 can vary, depending on its "case". For example:
2530 It ate my homework -> They ate my homework
2531 It ate it -> They ate them
2532 I fed my homework to it -> I fed my homework to them
2534 As a consequence of this ambiguity, C<PL()> or C<PL_N> have been implemented
2535 so that they always return the I<nominative> plural (that is, "they").
2537 However, when asked for the plural of an unambiguously I<accusative>
2538 "it" (namely, C<PL("to it")>, C<PL_N("from it")>, C<PL("with it")>,
2539 etc.), both subroutines will correctly return the accusative plural
2540 ("to them", "from them", "with them", etc.)
2543 =head2 The plurality of zero
2545 The rules governing the choice between:
2547 There were no errors.
2553 are complex and often depend more on I<intent> rather than I<content>.
2554 Hence it is infeasible to specify such rules algorithmically.
2556 Therefore, Lingua::EN::Text contents itself with the following compromise: If
2557 the governing number is zero, inflections always return the plural form
2558 unless the appropriate "classical" inflection is in effect, in which case the
2559 singular form is always returned.
2564 print inflect "There PL(was) NO(choice)";
2566 produces "There were no choices", whereas:
2568 classical 'zero'; # or: classical(zero=>1);
2570 print inflect "There PL(was) NO(choice)";
2572 it will print "There was no choice".
2575 =head2 Homographs with heterogeneous plurals
2577 Another context in which intent (and not content) sometimes determines
2578 plurality is where two distinct meanings of a word require different
2579 plurals. For example:
2581 Three basses were stolen from the band's equipment trailer.
2582 Three bass were stolen from the band's aquarium.
2584 I put the mice next to the cheese.
2585 I put the mouses next to the computers.
2587 Several thoughts about leaving crossed my mind.
2588 Several thought about leaving across my lawn.
2590 Lingua::EN::Inflect handles such words in two ways:
2596 If both meanings of the word are the I<same> part of speech (for
2597 example, "bass" is a noun in both sentences above), then one meaning
2598 is chosen as the "usual" meaning, and only that meaning's plural is
2599 ever returned by any of the inflection subroutines.
2603 If each meaning of the word is a different part of speech (for
2604 example, "thought" is both a noun and a verb), then the noun's
2605 plural is returned by C<PL()> and C<PL_N()> and the verb's plural is
2606 returned only by C<PL_V()>.
2610 Such contexts are, fortunately, uncommon (particularly
2611 "same-part-of-speech" examples). An informal study of nearly 600
2612 "difficult plurals" indicates that C<PL()> can be relied upon to "get
2613 it right" about 98% of the time (although, of course, ichthyophilic
2614 guitarists or cyber-behaviouralists may experience higher rates of
2617 If the choice of a particular "usual inflection" is considered
2618 inappropriate, it can always be reversed with a preliminary call
2619 to the corresponding C<def_...> subroutine.
2623 I'm not taking any further correspondence on:
2629 Despite the populist pandering of certain New World dictionaries, the
2630 plural is "octopuses" or (for the pendantic classicist) "octopodes". The
2631 suffix "-pus" is Greek, not Latin, so the plural is "-podes", not "pi".
2636 Had no plural in Latin (possibly because it was a mass noun).
2637 The only plural is the Anglicized "viruses".
2643 Damian Conway (damian@conway.org)
2644 Matthew Persico (ORD inflection)
2647 =head1 BUGS AND IRRITATIONS
2649 The endless inconsistencies of English.
2651 (I<Please> report words for which the correct plural or
2652 indefinite article is not formed, so that the reliability
2653 of Lingua::EN::Inflect can be improved.)
2659 Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
2660 This module is free software. It may be used, redistributed
2661 and/or modified under the same terms as Perl itself.