moose-class/exercises/t/lib/Lingua/EN/Inflect.pm

   1 package Lingua::EN::Inflect;
   2
   3 use strict;
   4 use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA);
   5 use Env;
   6
   7 require Exporter;
   8 @ISA = qw(Exporter);
   9
  10 $VERSION = '1.89';
  11
  12 %EXPORT_TAGS =
  13 (
  14         ALL =>          [ qw( classical inflect
  15                               PL PL_N PL_V PL_ADJ NO NUM A AN
  16                               PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
  17                               PART_PRES
  18                               ORD
  19                               NUMWORDS
  20                               def_noun def_verb def_adj def_a def_an )],
  21
  22         INFLECTIONS =>  [ qw( classical inflect
  23                               PL PL_N PL_V PL_ADJ PL_eq
  24                               NO NUM A AN PART_PRES )],
  25
  26         PLURALS =>      [ qw( classical inflect
  27                               PL PL_N PL_V PL_ADJ NO NUM
  28                               PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
  29
  30         COMPARISONS =>  [ qw( classical
  31                               PL_eq PL_N_eq PL_V_eq PL_ADJ_eq )],
  32
  33         ARTICLES =>     [ qw( classical inflect NUM A AN )],
  34
  35         NUMERICAL =>    [ qw( ORD NUMWORDS )],
  36
  37         USER_DEFINED => [ qw( def_noun def_verb def_adj def_a def_an )],
  38 );
  39
  40 Exporter::export_ok_tags(qw( ALL ));
  41
  42 # SUPPORT CLASSICAL PLURALIZATIONS
  43
  44 my %def_classical = (
  45     all      => 0,
  46     zero     => 0,
  47     herd     => 0,
  48     names    => 1,
  49     persons  => 0,
  50     ancient  => 0,
  51 );
  52
  53 my %all_classical = (
  54     all      => 1,
  55     zero     => 1,
  56     herd     => 1,
  57     names    => 1,
  58     persons  => 1,
  59     ancient  => 1,
  60 );
  61
  62 my %classical = %def_classical;
  63
  64 my $classical_mode = join '|', keys %all_classical;
  65    $classical_mode = qr/^(?:$classical_mode)$/;
  66
  67 sub classical
  68 {
  69     if (!@_) {
  70         %classical = %all_classical;
  71         return;
  72     }
  73     if (@_==1 && $_[0] !~ $classical_mode) {
  74         %classical = $_[0] ? %all_classical : ();
  75         return;
  76     }
  77     while (@_) {
  78         my $arg = shift;
  79         if ($arg !~ $classical_mode) {
  80             die "Unknown classical mode ($arg)\n";
  81         }
  82         if (@_ && $_[0] !~ $classical_mode) { $classical{$arg} = shift; }
  83         else                                { $classical{$arg} = 1;     }
  84
  85         if ($arg eq 'all') {
  86             %classical = $classical{all} ? %all_classical : ();
  87         }
  88     }
  89 }
  90
  91 my $persistent_count;
  92
  93 sub NUM         # (;$count,$show)
  94 {
  95         if (defined $_[0])
  96         {
  97                 $persistent_count = $_[0];
  98                 return $_[0] if !defined($_[1]) || $_[1];
  99         }
 100         else
 101         {
 102                 $persistent_count = undef;
 103         }
 104         return '';
 105 }
 106
 107
 108 # 0. PERFORM GENERAL INFLECTIONS IN A STRING
 109
 110 sub enclose { "(?:$_[0])" }
 111
 112 sub inflect
 113 {
 114         my $save_persistent_count = $persistent_count;
 115         my @sections = split /(NUM\([^)]*\))/, $_[0];
 116         my $inflection = "";
 117
 118         foreach ( @sections )
 119         {
 120                 unless (s/NUM\(\s*?(?:([^),]*)(?:,([^)]*))?)?\)/ NUM($1,$2) /xe)
 121                 {
 122                     1 while
 123                        s/\bPL   \( ([^),]*) (, ([^)]*) )? \)  / PL($1,$3)   /xeg
 124                     || s/\bPL_N \( ([^),]*) (, ([^)]*) )? \)  / PL_N($1,$3) /xeg
 125                     || s/\bPL_V \( ([^),]*) (, ([^)]*) )? \)  / PL_V($1,$3) /xeg
 126                     || s/\bPL_ADJ \( ([^),]*) (, ([^)]*) )? \)  / PL_ADJ($1,$3) /xeg
 127                     || s/\bAN?  \( ([^),]*) (, ([^)]*) )? \)  / A($1,$3)    /xeg
 128                     || s/\bNO   \( ([^),]*) (, ([^)]*) )? \)  / NO($1,$3)   /xeg
 129                     || s/\bORD  \( ([^)]*) \)                 / ORD($1)   /xeg
 130                     || s/\bNUMWORDS  \( ([^)]*) \)            / NUMWORDS($1)   /xeg
 131                     || s/\bPART_PRES  \( ([^)]*) \)            / PART_PRES($1)   /xeg
 132                 }
 133
 134                 $inflection .= $_;
 135         }
 136
 137         $persistent_count = $save_persistent_count;
 138         return $inflection;
 139 }
 140
 141
 142 # 1. PLURALS
 143
 144 my %PL_sb_irregular_s =
 145 (
 146         "corpus"        => "corpuses|corpora",
 147         "opus"          => "opuses|opera",
 148         "genus"         => "genera",
 149         "mythos"        => "mythoi",
 150         "penis"         => "penises|penes",
 151         "testis"        => "testes",
 152         "atlas"         => "atlases|atlantes",
 153 );
 154
 155 my %PL_sb_irregular =
 156 (
 157         "child"         => "children",
 158         "brother"       => "brothers|brethren",
 159         "loaf"          => "loaves",
 160         "hoof"          => "hoofs|hooves",
 161         "beef"          => "beefs|beeves",
 162         "money"         => "monies",
 163         "mongoose"      => "mongooses",
 164         "ox"            => "oxen",
 165         "cow"           => "cows|kine",
 166         "soliloquy"     => "soliloquies",
 167         "graffito"      => "graffiti",
 168         "prima donna"   => "prima donnas|prime donne",
 169         "octopus"       => "octopuses|octopodes",
 170         "genie"         => "genies|genii",
 171         "ganglion"      => "ganglions|ganglia",
 172         "trilby"        => "trilbys",
 173         "turf"          => "turfs|turves",
 174         "numen"         => "numina",
 175     "atman"     => "atmas",
 176         "occiput"       => "occiputs|occipita",
 177
 178         %PL_sb_irregular_s,
 179 );
 180
 181 my $PL_sb_irregular = enclose join '|', keys %PL_sb_irregular;
 182
 183 # CLASSICAL "..is" -> "..ides"
 184
 185 my @PL_sb_C_is_ides =
 186 (
 187 # GENERAL WORDS...
 188
 189         "ephemeris", "iris", "clitoris",
 190         "chrysalis", "epididymis",
 191
 192 # INFLAMATIONS...
 193
 194         ".*itis",
 195
 196 );
 197
 198 my $PL_sb_C_is_ides = enclose join "|", map { substr($_,0,-2) } @PL_sb_C_is_ides;
 199
 200 # CLASSICAL "..a" -> "..ata"
 201
 202 my @PL_sb_C_a_ata =
 203 (
 204         "anathema", "bema", "carcinoma", "charisma", "diploma",
 205         "dogma", "drama", "edema", "enema", "enigma", "lemma",
 206         "lymphoma", "magma", "melisma", "miasma", "oedema",
 207         "sarcoma", "schema", "soma", "stigma", "stoma", "trauma",
 208         "gumma", "pragma",
 209 );
 210
 211 my $PL_sb_C_a_ata = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_a_ata;
 212
 213 # UNCONDITIONAL "..a" -> "..ae"
 214
 215 my $PL_sb_U_a_ae = enclose join "|",
 216 (
 217         "alumna", "alga", "vertebra", "persona"
 218 );
 219
 220 # CLASSICAL "..a" -> "..ae"
 221
 222 my $PL_sb_C_a_ae = enclose join "|",
 223 (
 224         "amoeba", "antenna", "formula", "hyperbola",
 225         "medusa", "nebula", "parabola", "abscissa",
 226         "hydra", "nova", "lacuna", "aurora", ".*umbra",
 227         "flora", "fauna",
 228 );
 229
 230 # CLASSICAL "..en" -> "..ina"
 231
 232 my $PL_sb_C_en_ina = enclose join "|", map { substr($_,0,-2) }
 233 (
 234         "stamen", "foramen", "lumen"
 235 );
 236
 237 # UNCONDITIONAL "..um" -> "..a"
 238
 239 my $PL_sb_U_um_a = enclose join "|", map { substr($_,0,-2) }
 240 (
 241         "bacterium",    "agendum",      "desideratum",  "erratum",
 242         "stratum",      "datum",        "ovum",         "extremum",
 243         "candelabrum",
 244 );
 245
 246 # CLASSICAL "..um" -> "..a"
 247
 248 my $PL_sb_C_um_a = enclose join "|", map { substr($_,0,-2) }
 249 (
 250         "maximum",      "minimum",        "momentum",   "optimum",
 251         "quantum",      "cranium",        "curriculum", "dictum",
 252         "phylum",       "aquarium",       "compendium", "emporium",
 253         "enconium",     "gymnasium",  "honorarium",     "interregnum",
 254         "lustrum",      "memorandum", "millennium",     "rostrum",
 255         "spectrum",     "speculum",       "stadium",    "trapezium",
 256         "ultimatum",    "medium",       "vacuum",       "velum",
 257         "consortium",
 258 );
 259
 260 # UNCONDITIONAL "..us" -> "i"
 261
 262 my $PL_sb_U_us_i = enclose join "|", map { substr($_,0,-2) }
 263 (
 264         "alumnus",      "alveolus",     "bacillus",     "bronchus",
 265         "locus",        "nucleus",      "stimulus",     "meniscus",
 266 );
 267
 268 # CLASSICAL "..us" -> "..i"
 269
 270 my $PL_sb_C_us_i = enclose join "|", map { substr($_,0,-2) }
 271 (
 272         "focus",        "radius",       "genius",
 273         "incubus",      "succubus",     "nimbus",
 274         "fungus",       "nucleolus",    "stylus",
 275         "torus",        "umbilicus",    "uterus",
 276         "hippopotamus",
 277 );
 278
 279 # CLASSICAL "..us" -> "..us"  (ASSIMILATED 4TH DECLENSION LATIN NOUNS)
 280
 281 my $PL_sb_C_us_us = enclose join "|",
 282 (
 283         "status", "apparatus", "prospectus", "sinus",
 284         "hiatus", "impetus", "plexus",
 285 );
 286
 287 # UNCONDITIONAL "..on" -> "a"
 288
 289 my $PL_sb_U_on_a = enclose join "|", map { substr($_,0,-2) }
 290 (
 291         "criterion",    "perihelion",   "aphelion",
 292         "phenomenon",   "prolegomenon", "noumenon",
 293         "organon",      "asyndeton",    "hyperbaton",
 294 );
 295
 296 # CLASSICAL "..on" -> "..a"
 297
 298 my $PL_sb_C_on_a = enclose join "|", map { substr($_,0,-2) }
 299 (
 300         "oxymoron",
 301 );
 302
 303 # CLASSICAL "..o" -> "..i"  (BUT NORMALLY -> "..os")
 304
 305 my @PL_sb_C_o_i =
 306 (
 307         "solo",         "soprano",      "basso",        "alto",
 308         "contralto",    "tempo",        "piano",        "virtuoso",
 309 );
 310 my $PL_sb_C_o_i = enclose join "|", map { substr($_,0,-1) } @PL_sb_C_o_i;
 311
 312 # ALWAYS "..o" -> "..os"
 313
 314 my $PL_sb_U_o_os = enclose join "|",
 315 (
 316         "albino",       "archipelago",  "armadillo",
 317         "commando",     "crescendo",    "fiasco",
 318         "ditto",        "dynamo",       "embryo",
 319         "ghetto",       "guano",        "inferno",
 320         "jumbo",        "lumbago",      "magneto",
 321         "manifesto",    "medico",       "octavo",
 322         "photo",        "pro",          "quarto",
 323         "canto",        "lingo",        "generalissimo",
 324         "stylo",        "rhino",        "casino",
 325         "auto",     "macro",    'zero',
 326
 327         @PL_sb_C_o_i,
 328 );
 329
 330
 331 # UNCONDITIONAL "..[ei]x" -> "..ices"
 332
 333 my $PL_sb_U_ex_ices = enclose join "|", map { substr($_,0,-2) }
 334 (
 335         "codex",        "murex",        "silex",
 336 );
 337
 338 my $PL_sb_U_ix_ices = enclose join "|", map { substr($_,0,-2) }
 339 (
 340         "radix",        "helix",
 341 );
 342
 343 # CLASSICAL "..[ei]x" -> "..ices"
 344
 345 my $PL_sb_C_ex_ices = enclose join "|", map { substr($_,0,-2) }
 346 (
 347         "vortex",       "vertex",       "cortex",       "latex",
 348         "pontifex",     "apex",         "index",        "simplex",
 349 );
 350
 351 my $PL_sb_C_ix_ices = enclose join "|", map { substr($_,0,-2) }
 352 (
 353         "appendix",
 354 );
 355
 356 # ARABIC: ".." -> "..i"
 357
 358 my $PL_sb_C_i = enclose join "|",
 359 (
 360         "afrit",        "afreet",       "efreet",
 361 );
 362
 363 # HEBREW: ".." -> "..im"
 364
 365 my $PL_sb_C_im = enclose join "|",
 366 (
 367         "goy",          "seraph",       "cherub",
 368 );
 369
 370 # UNCONDITIONAL "..man" -> "..mans"
 371
 372 my $PL_sb_U_man_mans = enclose join "|",
 373 qw(
 374         human
 375         Alabaman Bahaman Burman German
 376         Hiroshiman Liman Nakayaman Oklahoman
 377         Panaman Selman Sonaman Tacoman Yakiman
 378         Yokohaman Yuman
 379 );
 380
 381 my @PL_sb_uninflected_s =
 382 (
 383 # PAIRS OR GROUPS SUBSUMED TO A SINGULAR...
 384     "breeches", "britches", "clippers", "gallows", "hijinks",
 385         "headquarters", "pliers", "scissors", "testes", "herpes",
 386         "pincers", "shears", "proceedings", "trousers",
 387
 388 # UNASSIMILATED LATIN 4th DECLENSION
 389
 390         "cantus", "coitus", "nexus",
 391
 392 # RECENT IMPORTS...
 393         "contretemps", "corps", "debris",
 394         ".*ois", "siemens",
 395
 396 # DISEASES
 397         ".*measles", "mumps",
 398
 399 # MISCELLANEOUS OTHERS...
 400         "diabetes", "jackanapes", "series", "species", "rabies",
 401         "chassis", "innings", "news", "mews",
 402 );
 403
 404 my $PL_sb_uninflected_herd = enclose join "|",
 405 # DON'T INFLECT IN CLASSICAL MODE, OTHERWISE NORMAL INFLECTION
 406 (
 407         "wildebeest", "swine", "eland", "bison", "buffalo",
 408         "elk", "moose", "rhinoceros",
 409 );
 410
 411 my $PL_sb_uninflected = enclose join "|",
 412 (
 413 # SOME FISH AND HERD ANIMALS
 414         ".*fish", "tuna", "salmon", "mackerel", "trout",
 415         "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
 416
 417         ".*deer", ".*sheep",
 418
 419 # ALL NATIONALS ENDING IN -ese
 420         "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
 421         "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
 422         "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
 423         "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
 424         "Shavese", "Vermontese", "Wenchowese", "Yengeese",
 425         ".*[nrlm]ese",
 426
 427 # SOME WORDS ENDING IN ...s (OFTEN PAIRS TAKEN AS A WHOLE)
 428
 429         @PL_sb_uninflected_s,
 430
 431 # DISEASES
 432         ".*pox",
 433
 434
 435 # OTHER ODDITIES
 436         "graffiti", "djinn"
 437 );
 438
 439 # SINGULAR WORDS ENDING IN ...s (ALL INFLECT WITH ...es)
 440
 441 my $PL_sb_singular_s = enclose join '|',
 442 (
 443         ".*ss",
 444         "acropolis", "aegis", "alias", "asbestos", "bathos", "bias",
 445         "bronchitis", "bursitis", "caddis", "cannabis",
 446         "canvas", "chaos", "cosmos", "dais", "digitalis",
 447         "epidermis", "ethos", "eyas", "gas", "glottis",
 448         "hubris", "ibis", "lens", "mantis", "marquis", "metropolis",
 449         "pathos", "pelvis", "polis", "rhinoceros",
 450         "sassafras", "trellis", ".*us", "[A-Z].*es",
 451
 452         @PL_sb_C_is_ides,
 453 );
 454
 455 my $PL_v_special_s = enclose join '|',
 456 (
 457         $PL_sb_singular_s,
 458         @PL_sb_uninflected_s,
 459         keys %PL_sb_irregular_s,
 460         '(.*[csx])is',
 461         '(.*)ceps',
 462         '[A-Z].*s',
 463 );
 464
 465 my %PL_sb_postfix_adj = (
 466         'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
 467         'martial' => [qw(court)],
 468 );
 469
 470 foreach (keys %PL_sb_postfix_adj) {
 471         $PL_sb_postfix_adj{$_} = enclose
 472                                  enclose(join('|', @{$PL_sb_postfix_adj{$_}}))
 473                                . "(?=(?:-|\\s+)$_)";
 474 }
 475
 476 my $PL_sb_postfix_adj = '(' . join('|', values %PL_sb_postfix_adj) . ')(.*)';
 477
 478 my $PL_sb_military = 'major|lieutenant|brigadier|adjutant|quartermaster';
 479 my $PL_sb_general = '((?!'.$PL_sb_military.').*?)((-|\s+)general)';
 480
 481 my $PL_prep = enclose join '|', qw (
 482         about above across after among around at athwart before behind
 483         below beneath beside besides between betwixt beyond but by
 484         during except for from in into near of off on onto out over
 485         since till to under until unto upon with
 486 );
 487
 488 my $PL_sb_prep_dual_compound = '(.*?)((?:-|\s+)(?:'.$PL_prep.'|d[eu])(?:-|\s+))a(?:-|\s+)(.*)';
 489
 490 my $PL_sb_prep_compound = '(.*?)((-|\s+)('.$PL_prep.'|d[eu])((-|\s+)(.*))?)';
 491
 492
 493 my %PL_pron_nom =
 494 (
 495 #       NOMINATIVE              REFLEXIVE
 496
 497 "i"     => "we",        "myself"   =>   "ourselves",
 498 "you"   => "you",       "yourself" =>   "yourselves",
 499 "she"   => "they",      "herself"  =>   "themselves",
 500 "he"    => "they",      "himself"  =>   "themselves",
 501 "it"    => "they",      "itself"   =>   "themselves",
 502 "they"  => "they",      "themself" =>   "themselves",
 503
 504 #       POSSESSIVE
 505
 506 "mine"   => "ours",
 507 "yours"  => "yours",
 508 "hers"   => "theirs",
 509 "his"    => "theirs",
 510 "its"    => "theirs",
 511 "theirs" => "theirs",
 512 );
 513
 514 my %PL_pron_acc =
 515 (
 516 #       ACCUSATIVE              REFLEXIVE
 517
 518 "me"    => "us",        "myself"   =>   "ourselves",
 519 "you"   => "you",       "yourself" =>   "yourselves",
 520 "her"   => "them",      "herself"  =>   "themselves",
 521 "him"   => "them",      "himself"  =>   "themselves",
 522 "it"    => "them",      "itself"   =>   "themselves",
 523 "them"  => "them",      "themself" =>   "themselves",
 524 );
 525
 526 my $PL_pron_acc = enclose join '|', keys %PL_pron_acc;
 527
 528 my %PL_v_irregular_pres =
 529 (
 530 #       1st PERS. SING.         2ND PERS. SING.         3RD PERS. SINGULAR
 531 #                               3RD PERS. (INDET.)
 532
 533 "am"    => "are",       "are"   => "are",       "is"     => "are",
 534 "was"   => "were",      "were"  => "were",      "was"    => "were",
 535 "have"  => "have",      "have"  => "have",      "has"    => "have",
 536 "do"    => "do",        "do"    => "do",        "does"   => "do",
 537 );
 538
 539 my $PL_v_irregular_pres = enclose join '|', keys %PL_v_irregular_pres;
 540
 541 my %PL_v_ambiguous_pres =
 542 (
 543 #       1st PERS. SING.         2ND PERS. SING.         3RD PERS. SINGULAR
 544 #                               3RD PERS. (INDET.)
 545
 546 "act"   => "act",       "act"   => "act",       "acts"    => "act",
 547 "blame" => "blame",     "blame" => "blame",     "blames"  => "blame",
 548 "can"   => "can",       "can"   => "can",       "can"     => "can",
 549 "must"  => "must",      "must"  => "must",      "must"    => "must",
 550 "fly"   => "fly",       "fly"   => "fly",       "flies"   => "fly",
 551 "copy"  => "copy",      "copy"  => "copy",      "copies"  => "copy",
 552 "drink" => "drink",     "drink" => "drink",     "drinks"  => "drink",
 553 "fight" => "fight",     "fight" => "fight",     "fights"  => "fight",
 554 "fire"  => "fire",      "fire"  => "fire",      "fires"   => "fire",
 555 "like"  => "like",      "like"  => "like",      "likes"   => "like",
 556 "look"  => "look",      "look"  => "look",      "looks"   => "look",
 557 "make"  => "make",      "make"  => "make",      "makes"   => "make",
 558 "reach" => "reach",     "reach" => "reach",     "reaches" => "reach",
 559 "run"   => "run",       "run"   => "run",       "runs"    => "run",
 560 "sink"  => "sink",      "sink"  => "sink",      "sinks"   => "sink",
 561 "sleep" => "sleep",     "sleep" => "sleep",     "sleeps"  => "sleep",
 562 "view"  => "view",      "view"  => "view",      "views"   => "view",
 563 );
 564
 565 my $PL_v_ambiguous_pres = enclose join '|', keys %PL_v_ambiguous_pres;
 566
 567
 568 my $PL_v_irregular_non_pres = enclose join '|',
 569 (
 570 "did", "had", "ate", "made", "put",
 571 "spent", "fought", "sank", "gave", "sought",
 572 "shall", "could", "ought", "should",
 573 );
 574
 575 my $PL_v_ambiguous_non_pres = enclose join '|',
 576 (
 577 "thought", "saw", "bent", "will", "might", "cut",
 578 );
 579
 580 # "..oes" -> "..oe" (the rest are "..oes" -> "o")
 581
 582 my $PL_v_oes_oe = enclose join "|",
 583 qw(
 584         .*shoes  .*hoes  .*toes
 585         canoes   floes   oboes  roes  throes  woes
 586 );
 587
 588 my $PL_count_zero = enclose join '|',
 589 (
 590 0, "no", "zero", "nil"
 591 );
 592
 593 my $PL_count_one = enclose join '|',
 594 (
 595 1, "a", "an", "one", "each", "every", "this", "that",
 596 );
 597
 598 my %PL_adj_special =
 599 (
 600 "a"    => "some",       "an"   =>  "some",
 601 "this" => "these",      "that" => "those",
 602 );
 603 my $PL_adj_special = enclose join '|', keys %PL_adj_special;
 604
 605 my %PL_adj_poss =
 606 (
 607 "my"    => "our",
 608 "your"  => "your",
 609 "its"   => "their",
 610 "her"   => "their",
 611 "his"   => "their",
 612 "their" => "their",
 613 );
 614 my $PL_adj_poss = enclose join '|', keys %PL_adj_poss;
 615
 616
 617 sub checkpat
 618 {
 619 local $SIG{__WARN__} = sub {0};
 620 do {$@ =~ s/at.*?$//;
 621     die "\nBad user-defined singular pattern:\n\t$@\n"}
 622         if (!eval "'' =~ m/$_[0]/; 1;" or $@);
 623 return @_;
 624 }
 625
 626 sub checkpatsubs
 627 {
 628 checkpat($_[0]);
 629 if (defined $_[1])
 630 {
 631         local $SIG{__WARN__} = sub {0};
 632         do {$@ =~ s/at.*?$//;
 633             die "\nBad user-defined plural string: '$_[1]'\n\t$@\n"}
 634                 if (!eval "qq{$_[1]}; 1;" or $@);
 635 }
 636 return @_;
 637 }
 638
 639 my @PL_sb_user_defined = ();
 640 my @PL_v_user_defined  = ();
 641 my @PL_adj_user_defined  = ();
 642 my @A_a_user_defined   = ();
 643
 644 sub def_noun
 645 {
 646         unshift @PL_sb_user_defined, checkpatsubs(@_);
 647         return 1;
 648 }
 649
 650 sub def_verb
 651 {
 652         unshift @PL_v_user_defined, checkpatsubs(@_[4,5]);
 653         unshift @PL_v_user_defined, checkpatsubs(@_[2,3]);
 654         unshift @PL_v_user_defined, checkpatsubs(@_[0,1]);
 655         return 1;
 656 }
 657
 658 sub def_adj
 659 {
 660         unshift @PL_adj_user_defined, checkpatsubs(@_);
 661         return 1;
 662 }
 663
 664 sub def_a
 665 {
 666 unshift @A_a_user_defined, checkpat(@_,'a');
 667 return 1;
 668 }
 669
 670 sub def_an
 671 {
 672 unshift @A_a_user_defined, checkpat(@_,'an');
 673 return 1;
 674 }
 675
 676 sub ud_match
 677 {
 678 my $word = shift;
 679 for (my $i=0; $i < @_; $i+=2)
 680 {
 681         if ($word =~ /^(?:$_[$i])$/i)
 682         {
 683                 last unless defined $_[$i+1];
 684                 return eval '"'.$_[$i+1].'"';
 685         }
 686 }
 687 return undef;
 688 }
 689
 690 do
 691 {
 692 local $SIG{__WARN__} = sub {0};
 693 my $rcfile;
 694
 695 $rcfile = $INC{'Lingua//EN/Inflect.pm'} || '';
 696 $rcfile =~ s/Inflect.pm$/.inflectrc/;
 697 do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
 698 if $rcfile && -r $rcfile && -s $rcfile;
 699
 700 $rcfile = "$ENV{HOME}/.inflectrc" || '';
 701 do $rcfile or die "\nBad .inflectrc file ($rcfile):\n\t$@\n"
 702 if $rcfile && -r $rcfile && -s $rcfile;
 703 };
 704
 705 sub postprocess         # FIX PEDANTRY AND CAPITALIZATION :-)
 706 {
 707 my ($orig, $inflected) = @_;
 708 $inflected =~ s/([^|]+)\|(.+)/ $classical{all}?$2:$1 /e;
 709 return $orig =~ /^I$/   ? $inflected
 710  : $orig =~ /^[A-Z]+$/  ? uc $inflected
 711  : $orig =~ /^[A-Z]/    ? ucfirst $inflected
 712  :                        $inflected;
 713 }
 714
 715 sub PL
 716 #   PL($word,$number)
 717 {
 718 my ($str, $count) = @_;
 719 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
 720 return $str unless $word;
 721 my $plural = postprocess $word,  _PL_special_adjective($word,$count)
 722                           || _PL_special_verb($word,$count)
 723                           || _PL_noun($word,$count);
 724 return $pre.$plural.$post;
 725 }
 726
 727 sub PL_N
 728 #   PL_N($word,$number)
 729 {
 730 my ($str, $count) = @_;
 731 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
 732 return $str unless $word;
 733 my $plural = postprocess $word, _PL_noun($word,$count);
 734 return $pre.$plural.$post;
 735 }
 736
 737 sub PL_V
 738 #   PL_V($word,$number)
 739 {
 740 my ($str, $count) = @_;
 741 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
 742 return $str unless $word;
 743 my $plural = postprocess $word, _PL_special_verb($word,$count)
 744                           || _PL_general_verb($word,$count);
 745 return $pre.$plural.$post;
 746 }
 747
 748 sub PL_ADJ
 749 #   PL_ADJ($word,$number)
 750 {
 751 my ($str, $count) = @_;
 752 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
 753 return $str unless $word;
 754 my $plural = postprocess $word, _PL_special_adjective($word,$count)
 755                           || $word;
 756 return $pre.$plural.$post;
 757 }
 758
 759 sub PL_eq         { _PL_eq(@_, \&PL); }
 760 sub PL_N_eq       { _PL_eq(@_, \&PL_N); }
 761 sub PL_V_eq       { _PL_eq(@_, \&PL_V); }
 762 sub PL_ADJ_eq     { _PL_eq(@_, \&PL_ADJ); }
 763
 764 sub _PL_eq
 765 {
 766 my ( $word1, $word2, $PL ) = @_;
 767 my %classval = %classical;
 768 %classical = %all_classical;
 769 my $result = "";
 770 $result = "eq"  if !$result && $word1 eq $word2;
 771 $result = "p:s" if !$result && $word1 eq &$PL($word2);
 772 $result = "s:p" if !$result && &$PL($word1) eq $word2;
 773 %classical = ();
 774 $result = "p:s" if !$result && $word1 eq &$PL($word2);
 775 $result = "s:p" if !$result && &$PL($word1) eq $word2;
 776 %classical = %classval;
 777
 778 if ($PL == \&PL || $PL == \&PL_N)
 779 {
 780         $result = "p:p"
 781                 if !$result && _PL_check_plurals_N($word1,$word2);
 782         $result = "p:p"
 783                 if !$result && _PL_check_plurals_N($word2,$word1);
 784 }
 785 if ($PL == \&PL || $PL == \&PL_ADJ)
 786 {
 787         $result = "p:p"
 788                 if !$result && _PL_check_plurals_ADJ($word1,$word2,$PL);
 789 }
 790
 791 return $result;
 792 }
 793
 794 sub _PL_reg_plurals
 795 {
 796         $_[0] =~ /($_[1])($_[2]\|\1$_[3]|$_[3]\|\1$_[2])/
 797 }
 798
 799 sub _PL_check_plurals_N
 800 {
 801 my $pair = "$_[0]|$_[1]";
 802 foreach ( values %PL_sb_irregular_s )   { return 1 if $_ eq $pair; }
 803 foreach ( values %PL_sb_irregular )     { return 1 if $_ eq $pair; }
 804
 805 return 1 if _PL_reg_plurals($pair, $PL_sb_C_a_ata,   "as","ata")
 806          || _PL_reg_plurals($pair, $PL_sb_C_is_ides, "is","ides")
 807          || _PL_reg_plurals($pair, $PL_sb_C_a_ae,    "s","e")
 808          || _PL_reg_plurals($pair, $PL_sb_C_en_ina,  "ens","ina")
 809          || _PL_reg_plurals($pair, $PL_sb_C_um_a,    "ums","a")
 810          || _PL_reg_plurals($pair, $PL_sb_C_us_i,    "uses","i")
 811          || _PL_reg_plurals($pair, $PL_sb_C_on_a,    "ons","a")
 812          || _PL_reg_plurals($pair, $PL_sb_C_o_i,     "os","i")
 813          || _PL_reg_plurals($pair, $PL_sb_C_ex_ices, "exes","ices")
 814          || _PL_reg_plurals($pair, $PL_sb_C_ix_ices, "ixes","ices")
 815          || _PL_reg_plurals($pair, $PL_sb_C_i,       "s","i")
 816          || _PL_reg_plurals($pair, $PL_sb_C_im,      "s","im")
 817
 818          || _PL_reg_plurals($pair, '.*eau',       "s","x")
 819          || _PL_reg_plurals($pair, '.*ieu',       "s","x")
 820          || _PL_reg_plurals($pair, '.*tri',       "xes","ces")
 821          || _PL_reg_plurals($pair, '.{2,}[yia]n', "xes","ges");
 822
 823
 824 return 0;
 825 }
 826
 827 sub _PL_check_plurals_ADJ
 828 {
 829 my ( $word1a, $word2a ) = @_;
 830 my ( $word1b, $word2b ) = @_;
 831
 832 $word1a = '' unless $word1a =~ s/'s?$//;
 833 $word2a = '' unless $word2a =~ s/'s?$//;
 834 $word1b = '' unless $word1b =~ s/s'$//;
 835 $word2b = '' unless $word2b =~ s/s'$//;
 836
 837 if ($word1a)
 838 {
 839         return 1 if $word2a && ( _PL_check_plurals_N($word1a, $word2a)
 840                                 || _PL_check_plurals_N($word2a, $word1a) );
 841         return 1 if $word2b && ( _PL_check_plurals_N($word1a, $word2b)
 842                                 || _PL_check_plurals_N($word2b, $word1a) );
 843 }
 844 if ($word1b)
 845 {
 846         return 1 if $word2a && ( _PL_check_plurals_N($word1b, $word2a)
 847                                 || _PL_check_plurals_N($word2a, $word1b) );
 848         return 1 if $word2b && ( _PL_check_plurals_N($word1b, $word2b)
 849                                 || _PL_check_plurals_N($word2b, $word1b) );
 850 }
 851
 852
 853 return "";
 854 }
 855
 856 sub _PL_noun
 857 {
 858 my ( $word, $count ) = @_;
 859 my $value;                              # UTILITY VARIABLE
 860
 861 # DEFAULT TO PLURAL
 862
 863 $count = $persistent_count
 864         if !defined($count) && defined($persistent_count);
 865
 866 $count = (defined $count and $count=~/^($PL_count_one)$/io
 867          or defined $count and $classical{zero}
 868          and $count=~/^($PL_count_zero)$/io)
 869             ? 1
 870             : 2;
 871
 872 return $word if $count==1;
 873
 874 # HANDLE USER-DEFINED NOUNS
 875
 876 return $value if defined($value = ud_match($word, @PL_sb_user_defined));
 877
 878
 879 # HANDLE EMPTY WORD, SINGULAR COUNT AND UNINFLECTED PLURALS
 880
 881 $word eq ''                     and return $word;
 882
 883 $word =~ /^($PL_sb_uninflected)$/i
 884                                 and return $word;
 885
 886 $classical{herd} and $word =~ /^($PL_sb_uninflected_herd)$/i
 887                                 and return $word;
 888
 889
 890 # HANDLE COMPOUNDS ("Governor General", "mother-in-law", "aide-de-camp", ETC.)
 891
 892 $word =~ /^(?:$PL_sb_postfix_adj)$/i
 893                                 and $value = $2
 894                                 and return _PL_noun($1,2)
 895                                            . $value;
 896
 897 $word =~ /^(?:$PL_sb_prep_dual_compound)$/i
 898                                 and $value = [$2,$3]
 899                                 and return _PL_noun($1,2)
 900                                            . $value->[0]
 901                                            . _PL_noun($value->[1]);
 902
 903 $word =~ /^(?:$PL_sb_prep_compound)$/i
 904                                 and $value = $2
 905                                 and return _PL_noun($1,2)
 906                                            . $value;
 907
 908 # HANDLE PRONOUNS
 909
 910 $word =~ /^((?:$PL_prep)\s+)($PL_pron_acc)$/i
 911                                 and return $1.$PL_pron_acc{lc($2)};
 912
 913 $value = $PL_pron_nom{lc($word)}
 914                                 and return $value;
 915
 916 $word =~ /^($PL_pron_acc)$/i
 917                                 and return $PL_pron_acc{lc($1)};
 918
 919 # HANDLE ISOLATED IRREGULAR PLURALS
 920
 921 $word =~ /(.*)\b($PL_sb_irregular)$/i
 922                                 and return $1 . $PL_sb_irregular{lc $2};
 923 $word =~ /($PL_sb_U_man_mans)$/i
 924                                 and return "$1s";
 925 $word =~ /(\S*)(person)$/i and return $classical{persons}?"$1persons":"$1people";
 926
 927 # HANDLE FAMILIES OF IRREGULAR PLURALS
 928
 929 $word =~ /(.*)man$/i            and return "$1men";
 930 $word =~ /(.*[ml])ouse$/i       and return "$1ice";
 931 $word =~ /(.*)goose$/i          and return "$1geese";
 932 $word =~ /(.*)tooth$/i          and return "$1teeth";
 933 $word =~ /(.*)foot$/i           and return "$1feet";
 934
 935 # HANDLE UNASSIMILATED IMPORTS
 936
 937 $word =~ /(.*)ceps$/i           and return $word;
 938 $word =~ /(.*)zoon$/i           and return "$1zoa";
 939 $word =~ /(.*[csx])is$/i        and return "$1es";
 940 $word =~ /($PL_sb_U_ex_ices)ex$/i       and return "$1ices";
 941 $word =~ /($PL_sb_U_ix_ices)ix$/i       and return "$1ices";
 942 $word =~ /($PL_sb_U_um_a)um$/i  and return "$1a";
 943 $word =~ /($PL_sb_U_us_i)us$/i  and return "$1i";
 944 $word =~ /($PL_sb_U_on_a)on$/i  and return "$1a";
 945 $word =~ /($PL_sb_U_a_ae)$/i    and return "$1e";
 946
 947 # HANDLE INCOMPLETELY ASSIMILATED IMPORTS
 948
 949 if ($classical{ancient})
 950 {
 951     $word =~ /(.*)trix$/i               and return "$1trices";
 952     $word =~ /(.*)eau$/i                and return "$1eaux";
 953     $word =~ /(.*)ieu$/i                and return "$1ieux";
 954     $word =~ /(.{2,}[yia])nx$/i         and return "$1nges";
 955     $word =~ /($PL_sb_C_en_ina)en$/i    and return "$1ina";
 956     $word =~ /($PL_sb_C_ex_ices)ex$/i   and return "$1ices";
 957     $word =~ /($PL_sb_C_ix_ices)ix$/i   and return "$1ices";
 958     $word =~ /($PL_sb_C_um_a)um$/i      and return "$1a";
 959     $word =~ /($PL_sb_C_us_i)us$/i      and return "$1i";
 960     $word =~ /($PL_sb_C_us_us)$/i       and return "$1";
 961     $word =~ /($PL_sb_C_a_ae)$/i        and return "$1e";
 962     $word =~ /($PL_sb_C_a_ata)a$/i      and return "$1ata";
 963     $word =~ /($PL_sb_C_is_ides)is$/i   and return "$1ides";
 964     $word =~ /($PL_sb_C_o_i)o$/i        and return "$1i";
 965     $word =~ /($PL_sb_C_on_a)on$/i      and return "$1a";
 966     $word =~ /$PL_sb_C_im$/i            and return "${word}im";
 967     $word =~ /$PL_sb_C_i$/i             and return "${word}i";
 968 }
 969
 970
 971 # HANDLE SINGULAR NOUNS ENDING IN ...s OR OTHER SILIBANTS
 972
 973 $word =~ /^($PL_sb_singular_s)$/i       and return "$1es";
 974 $word =~ /^([A-Z].*s)$/                 and $classical{names} and return "$1es";
 975 $word =~ /(.*)([cs]h|[zx])$/i           and return "$1$2es";
 976 # $word =~ /(.*)(us)$/i                 and return "$1$2es";
 977
 978 # HANDLE ...f -> ...ves
 979
 980 $word =~ /(.*[eao])lf$/i        and return "$1lves";
 981 $word =~ /(.*[^d])eaf$/i        and return "$1eaves";
 982 $word =~ /(.*[nlw])ife$/i       and return "$1ives";
 983 $word =~ /(.*)arf$/i            and return "$1arves";
 984
 985 # HANDLE ...y
 986
 987 $word =~ /(.*[aeiou])y$/i       and return "$1ys";
 988 $word =~ /([A-Z].*y)$/          and $classical{names} and return "$1s";
 989 $word =~ /(.*)y$/i              and return "$1ies";
 990
 991 # HANDLE ...o
 992
 993 $word =~ /$PL_sb_U_o_os$/i      and return "${word}s";
 994 $word =~ /[aeiou]o$/i           and return "${word}s";
 995 $word =~ /o$/i                  and return "${word}es";
 996
 997
 998 # OTHERWISE JUST ADD ...s
 999
1000 return "${word}s";
1001 }
1002
1003
1004 sub _PL_special_verb
1005 {
1006 my ( $word, $count ) = @_;
1007 $count = $persistent_count
1008         if !defined($count) && defined($persistent_count);
1009 $count = (defined $count and $count=~/^($PL_count_one)$/io or
1010           defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1011        : 2;
1012
1013 return undef if $count=~/^($PL_count_one)$/io;
1014
1015 my $value;                              # UTILITY VARIABLE
1016
1017 # HANDLE USER-DEFINED VERBS
1018
1019 return $value if defined($value = ud_match($word, @PL_v_user_defined));
1020
1021 # HANDLE IRREGULAR PRESENT TENSE (SIMPLE AND COMPOUND)
1022
1023 $word =~ /^($PL_v_irregular_pres)((\s.*)?)$/i
1024                 and return $PL_v_irregular_pres{lc $1}.$2;
1025
1026 # HANDLE IRREGULAR FUTURE, PRETERITE AND PERFECT TENSES
1027
1028 $word =~ /^($PL_v_irregular_non_pres)((\s.*)?)$/i
1029                 and return $word;
1030
1031 # HANDLE PRESENT NEGATIONS (SIMPLE AND COMPOUND)
1032
1033 $word =~ /^($PL_v_irregular_pres)(n't(\s.*)?)$/i
1034                 and return $PL_v_irregular_pres{lc $1}.$2;
1035
1036 $word =~ /^\S+n't\b/i
1037                 and return $word;
1038
1039 # HANDLE SPECIAL CASES
1040
1041 $word =~ /^($PL_v_special_s)$/          and return undef;
1042 $word =~ /\s/                           and return undef;
1043
1044 # HANDLE STANDARD 3RD PERSON (CHOP THE ...(e)s OFF SINGLE WORDS)
1045
1046 $word =~ /^(.*)([cs]h|[x]|zz|ss)es$/i   and return "$1$2";
1047
1048 $word =~ /^(..+)ies$/i                  and return "$1y";
1049
1050 $word =~ /($PL_v_oes_oe)$/              and return substr($1,0,-1);
1051 $word =~ /^(.+)oes$/i                   and return "$1o";
1052
1053 $word =~ /^(.*[^s])s$/i                 and return $1;
1054
1055 # OTHERWISE, A REGULAR VERB (HANDLE ELSEWHERE)
1056
1057 return undef;
1058 }
1059
1060 sub _PL_general_verb
1061 {
1062 my ( $word, $count ) = @_;
1063 $count = $persistent_count
1064         if !defined($count) && defined($persistent_count);
1065 $count = (defined $count and $count=~/^($PL_count_one)$/io or
1066           defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1067        : 2;
1068
1069 return $word if $count=~/^($PL_count_one)$/io;
1070
1071 # HANDLE AMBIGUOUS PRESENT TENSES  (SIMPLE AND COMPOUND)
1072
1073 $word =~ /^($PL_v_ambiguous_pres)((\s.*)?)$/i
1074                 and return $PL_v_ambiguous_pres{lc $1}.$2;
1075
1076 # HANDLE AMBIGUOUS PRETERITE AND PERFECT TENSES
1077
1078 $word =~ /^($PL_v_ambiguous_non_pres)((\s.*)?)$/i
1079                 and return $word;
1080
1081 # OTHERWISE, 1st OR 2ND PERSON IS UNINFLECTED
1082
1083 return $word;
1084
1085 }
1086
1087 sub _PL_special_adjective
1088 {
1089 my ( $word, $count ) = @_;
1090 $count = $persistent_count
1091         if !defined($count) && defined($persistent_count);
1092 $count = (defined $count and $count=~/^($PL_count_one)$/io or
1093           defined $count and $classical{zero} and $count=~/^($PL_count_zero)$/io) ? 1
1094        : 2;
1095
1096 return $word if $count=~/^($PL_count_one)$/io;
1097
1098
1099 # HANDLE USER-DEFINED ADJECTIVES
1100
1101 my $value;
1102 return $value if defined($value = ud_match($word, @PL_adj_user_defined));
1103
1104 # HANDLE KNOWN CASES
1105
1106 $word =~ /^($PL_adj_special)$/i
1107                         and return $PL_adj_special{lc $1};
1108
1109 # HANDLE POSSESSIVES
1110
1111 $word =~ /^($PL_adj_poss)$/i
1112                         and return $PL_adj_poss{lc $1};
1113
1114 $word =~ /^(.*)'s?$/    and do { my $pl = PL_N($1);
1115                                  return "$pl'" . ($pl =~ m/s$/ ? "" : "s");
1116                                };
1117
1118 # OTHERWISE, NO IDEA
1119
1120 return undef;
1121
1122 }
1123
1124
1125 # 2. INDEFINITE ARTICLES
1126
1127 # THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND"
1128 # CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY
1129 # TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!)
1130
1131 my $A_abbrev = q{
1132 (?! FJO | [HLMNS]Y.  | RY[EO] | SQU
1133   | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
1134 [FHLMNRSX][A-Z]
1135 };
1136
1137 # THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A
1138 # 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE
1139 # IMPLIES AN ABBREVIATION.
1140
1141 my $A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)';
1142
1143 # EXCEPTIONS TO EXCEPTIONS
1144
1145 my $A_explicit_an = enclose join '|',
1146 (
1147 "euler",
1148 "hour(?!i)", "heir", "honest", "hono",
1149 );
1150
1151 sub A
1152 {
1153 my ($str, $count) = @_;
1154 my ($pre, $word, $post) = ( $str =~ m/\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z/i );
1155 return $str unless $word;
1156 my $result = _indef_article($word,$count);
1157 return $pre.$result.$post;
1158 }
1159
1160 sub AN { goto &A }
1161
1162 sub _indef_article
1163 {
1164 my ( $word, $count ) = @_;
1165
1166 $count = $persistent_count
1167         if !defined($count) && defined($persistent_count);
1168
1169 return "$count $word"
1170         if defined $count && $count!~/^($PL_count_one)$/io;
1171
1172 # HANDLE USER-DEFINED VARIANTS
1173
1174 my $value;
1175 return $value if defined($value = ud_match($word, @A_a_user_defined));
1176
1177 # HANDLE SPECIAL CASES
1178
1179 $word =~ /^($A_explicit_an)/i           and return "an $word";
1180
1181 # HANDLE ABBREVIATIONS
1182
1183 $word =~ /^($A_abbrev)/ox               and return "an $word";
1184 $word =~ /^[aefhilmnorsx][.-]/i         and return "an $word";
1185 $word =~ /^[a-z][.-]/i                  and return "a $word";
1186
1187 # HANDLE CONSONANTS
1188
1189 $word =~ /^[^aeiouy]/i          and return "a $word";
1190
1191 # HANDLE SPECIAL VOWEL-FORMS
1192
1193 $word =~ /^e[uw]/i                      and return "a $word";
1194 $word =~ /^onc?e\b/i                    and return "a $word";
1195 $word =~ /^uni([^nmd]|mo)/i             and return "a $word";
1196 $word =~ /^u[bcfhjkqrst][aeiou]/i       and return "a $word";
1197
1198 # HANDLE SPECIAL CAPITALS
1199
1200 $word =~ /^U[NK][AIEO]?/                and return "a $word";
1201
1202 # HANDLE VOWELS
1203
1204 $word =~ /^[aeiou]/i            and return "an $word";
1205
1206 # HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND)
1207
1208 $word =~ /^($A_y_cons)/io       and return "an $word";
1209
1210 # OTHERWISE, GUESS "a"
1211                                     return "a $word";
1212 }
1213
1214 # 2. TRANSLATE ZERO-QUANTIFIED $word TO "no PL($word)"
1215
1216 sub NO
1217 {
1218 my ($str, $count) = @_;
1219 my ($pre, $word, $post) = ($str =~ m/\A(\s*)(.+?)(\s*)\Z/);
1220
1221 $count = $persistent_count
1222         if !defined($count) && defined($persistent_count);
1223 $count = 0 unless $count;
1224
1225 return "$pre$count " . PL($word,$count) . $post
1226         unless $count =~ /^$PL_count_zero$/;
1227 return "${pre}no ". PL($word,0) . $post ;
1228 }
1229
1230
1231 # PARTICIPLES
1232
1233 sub PART_PRES
1234 {
1235         local $_ = PL_V(shift,2);
1236            s/ie$/y/
1237         or s/ue$/u/
1238         or s/([auy])e$/$1/
1239         or s/ski$/ski/
1240         or s/i$//
1241         or s/([^e])e$/$1/
1242         or m/er$/
1243         or s/([^aeiou][aeiouy]([bdgmnprst]))$/$1$2/;
1244         return "${_}ing";
1245 }
1246
1247
1248
1249 # NUMERICAL INFLECTIONS
1250
1251 my %nth =
1252 (
1253         0 => 'th',
1254         1 => 'st',
1255         2 => 'nd',
1256         3 => 'rd',
1257         4 => 'th',
1258         5 => 'th',
1259         6 => 'th',
1260         7 => 'th',
1261         8 => 'th',
1262         9 => 'th',
1263         11 => 'th',
1264         12 => 'th',
1265         13 => 'th',
1266 );
1267
1268
1269 my %ordinal;
1270 @ordinal{qw(ty    one   two    three five  eight  nine  twelve )}=
1271          qw(tieth first second third fifth eighth ninth twelfth);
1272
1273 my $ordinal_suff = join '|', keys %ordinal, "";
1274
1275 $ordinal{""} = 'th';
1276
1277 sub ORD($)
1278 {
1279         my $num = shift;
1280         if ($num =~ /\d/) {
1281                 return $num . ($nth{$num%100} || $nth{$num%10});
1282         }
1283         else {
1284                 $num =~ s/($ordinal_suff)\Z/$ordinal{$1}/;
1285                 return $num;
1286         }
1287 }
1288
1289
1290 my %default_args =
1291 (
1292         'group'   => 0,
1293         'comma'   => ',',
1294         'and'     => 'and',
1295         'zero'    => 'zero',
1296         'one'     => 'one',
1297         'decimal' => 'point',
1298 );
1299
1300 my @unit = ('',qw(one two three four five six seven eight nine));
1301 my @teen = qw(ten eleven twelve thirteen fourteen
1302               fifteen sixteen seventeen eighteen nineteen);
1303 my @ten  = ('','',qw(twenty thirty forty fifty sixty seventy eighty ninety));
1304 my @mill = map { (my $val=$_) =~ s/_/illion/; " $val" }
1305            ('',qw(thousand m_ b_ tr_ quadr_ quint_ sext_ sept_ oct_ non_ dec_));
1306
1307
1308 sub mill { my $ind = $_[0]||0;
1309            die "Number out of range\n" if $ind > $#mill;
1310            return $ind<@mill ? $mill[$ind] : ' ???illion'; }
1311
1312 sub unit { return $unit[$_[0]]. mill($_[1]); }
1313
1314 sub ten
1315 {
1316         return $ten[$_[0]] . ($_[0]&&$_[1]?'-':'') . $unit[$_[1]] . mill($_[2])
1317                 if $_[0] ne '1';
1318         return $teen[$_[1]]. $mill[$_[2]||0];
1319 }
1320
1321 sub hund
1322 {
1323         return unit($_[0]) . " hundred" . ($_[1] || $_[2] ? " $_[4] " : '')
1324              . ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[0];
1325         return ten($_[1],$_[2]) . mill($_[3]) . ', ' if $_[1] || $_[2];
1326         return '';
1327 }
1328
1329
1330 sub enword
1331 {
1332         my ($num,$group,$zero,$one,$comma,$and) = @_;
1333
1334         if ($group==1)
1335         {
1336                 $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /eg;
1337         }
1338         elsif ($group==2)
1339         {
1340                 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /eg;
1341                 $num =~ s/(\d)/ ($1 ? unit($1) :" $zero")."$comma " /e;
1342         }
1343         elsif ($group==3)
1344         {
1345                 $num =~ s/(\d)(\d)(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")." ".($2 ? ten($2,$3) : $3 ? " $zero " . unit($3) : " $zero $zero") . "$comma " /eg;
1346                 $num =~ s/(\d)(\d)/ ($1 ? ten($1,$2) : $2 ? " $zero " . unit($2) : " $zero $zero") . "$comma " /e;
1347                 $num =~ s/(\d)/ ($1==1 ? " $one" : $1 ? unit($1) :" $zero")."$comma " /e;
1348         }
1349         elsif ($num+0==0) {
1350                 $num = $zero;
1351         }
1352         elsif ($num+0==1) {
1353                 $num = $one;
1354         }
1355         else {
1356                 $num =~ s/\A\s*0+//;
1357                 my $mill = 0;
1358                 1 while $num =~ s/(\d)(\d)(\d)(?=\D*\Z)/ hund($1,$2,$3,$mill++,$and) /e;
1359                 $num =~ s/(\d)(\d)(?=\D*\Z)/ ten($1,$2,$mill)."$comma " /e;
1360                 $num =~ s/(\d)(?=\D*\Z)/ unit($1,$mill) . "$comma "/e;
1361         }
1362         return $num;
1363 }
1364
1365 sub NUMWORDS
1366 {
1367         my $num = shift;
1368         my %arg = ( %default_args, @_ );
1369         my $group = $arg{group};
1370
1371         die "Bad chunking option: $group\n" unless $group =~ /\A[0-3]\Z/;
1372         my $sign = ($num =~ /\A\s*\+/) ? "plus"
1373                  : ($num =~ /\A\s*\-/) ? "minus"
1374                  : '';
1375
1376         my ($zero, $one) = @arg{'zero','one'};
1377         my $comma = $arg{comma};
1378         my $and = $arg{'and'};
1379
1380         my $ord = $num =~ s/(st|nd|rd|th)\Z//;
1381         my @chunks = ($arg{decimal})
1382                         ? $group ? split(/\./, $num) : split(/\./, $num, 2)
1383                         : ($num);
1384
1385         my $first = 1;
1386
1387         if ($chunks[0] eq '') { $first=0; shift @chunks; }
1388
1389         foreach ( @chunks )
1390         {
1391                 s/\D//g;
1392                 $_ = '0' unless $_;
1393
1394                 if (!$group && !$first) { $_ = enword($_,1,$zero,$one,$comma,$and) }
1395                 else                    { $_ = enword($_,$group,$zero,$one,$comma,$and) }
1396
1397                 s/, \Z//;
1398                 s/\s+,/,/g;
1399                 s/, (\S+)\s+\Z/ $and $1/ if !$group and $first;
1400                 s/\s+/ /g;
1401                 s/(\A\s|\s\Z)//g;
1402                 $first = '' if $first;
1403         }
1404
1405         my @numchunks = ();
1406         if ($first =~ /0/)
1407         {
1408                 unshift @chunks, '';
1409         }
1410         else
1411         {
1412                 @numchunks = split /\Q$comma /, $chunks[0];
1413         }
1414
1415         $numchunks[-1] =~ s/($ordinal_suff)\Z/$ordinal{$1}/
1416                 if $ord and @numchunks;
1417
1418         foreach (@chunks[1..$#chunks])
1419         {
1420                 push @numchunks, $arg{decimal};
1421                 push @numchunks, split /\Q$comma /;
1422         }
1423
1424         if (wantarray)
1425         {
1426                 unshift @numchunks, $sign if $sign;
1427                 return @numchunks
1428         }
1429         elsif ($group)
1430         {
1431                 return ($sign?"$sign ":'') .  join ", ", @numchunks;
1432         }
1433         else
1434         {
1435                 $num = ($sign?"$sign ":'') . shift @numchunks;
1436                 $first = ($num !~ /$arg{decimal}\Z/);
1437                 foreach ( @numchunks )
1438                 {
1439                         if (/\A$arg{decimal}\Z/)
1440                         {
1441                                 $num .= " $_";
1442                                 $first = 0;
1443                         }
1444                         elsif ($first)
1445                         {
1446                                 $num .= "$comma $_";
1447                         }
1448                         else
1449                         {
1450                                 $num .= " $_";
1451                         }
1452                 }
1453                 return $num;
1454         }
1455 }
1456
1457 1;
1458
1459 __END__
1460
1461 =head1 NAME
1462
1463 Lingua::EN::Inflect - Convert singular to plural. Select "a" or "an".
1464
1465 =head1 VERSION
1466
1467 This document describes version 1.86 of Lingua::EN::Inflect,
1468 released October 20, 2000.
1469
1470 =head1 SYNOPSIS
1471
1472  use Lingua::EN::Inflect qw ( PL PL_N PL_V PL_ADJ NO NUM
1473                               PL_eq PL_N_eq PL_V_eq PL_ADJ_eq
1474                               A AN
1475                               PART_PRES
1476                               ORD NUMWORDS
1477                               inflect classical
1478                               def_noun def_verb def_adj def_a def_an );
1479
1480
1481  # UNCONDITIONALLY FORM THE PLURAL
1482
1483       print "The plural of ", $word, " is ", PL($word), "\n";
1484
1485
1486  # CONDITIONALLY FORM THE PLURAL
1487
1488       print "I saw $cat_count ", PL("cat",$cat_count), "\n";
1489
1490
1491  # FORM PLURALS FOR SPECIFIC PARTS OF SPEECH
1492
1493       print PL_N("I",$N1), PL_V("saw",$N1),
1494             PL_ADJ("my",$N2), PL_N("saw",$N2), "\n";
1495
1496
1497  # DEAL WITH "0/1/N" -> "no/1/N" TRANSLATION:
1498
1499       print "There ", PL_V("was",$errors), NO(" error",$errors), "\n";
1500
1501
1502  # USE DEFAULT COUNTS:
1503
1504       print NUM($N1,""), PL("I"), PL_V(" saw"), NUM($N2), PL_N(" saw");
1505       print "There ", NUM($errors,''), PL_V("was"), NO(" error"), "\n";
1506
1507
1508  # COMPARE TWO WORDS "NUMBER-INSENSITIVELY":
1509
1510       print "same\n"      if PL_eq($word1, $word2);
1511       print "same noun\n" if PL_eq_N($word1, $word2);
1512       print "same verb\n" if PL_eq_V($word1, $word2);
1513       print "same adj.\n" if PL_eq_ADJ($word1, $word2);
1514
1515
1516  # ADD CORRECT "a" OR "an" FOR A GIVEN WORD:
1517
1518       print "Did you want ", A($thing), " or ", AN($idea), "\n";
1519
1520
1521  # CONVERT NUMERALS INTO ORDINALS (i.e. 1->1st, 2->2nd, 3->3rd, etc.)
1522
1523       print "It was", ORD($position), " from the left\n";
1524
1525  # CONVERT NUMERALS TO WORDS (i.e. 1->"one", 101->"one hundred and one", etc.)
1526  # IN A SCALAR CONTEXT: GET BACK A SINGLE STRING...
1527
1528     $words = NUMWORDS(1234);      # "one thousand, two hundred and thirty-four"
1529     $words = NUMWORDS(ORD(1234)); # "one thousand, two hundred and thirty-fourth"
1530
1531
1532  # IN A LIST CONTEXT: GET BACK A LIST OF STRINGSi, ONE FOR EACH "CHUNK"...
1533
1534     @words = NUMWORDS(1234);    # ("one thousand","two hundred and thirty-four")
1535
1536
1537  # OPTIONAL PARAMETERS CHANGE TRANSLATION:
1538
1539     $words = NUMWORDS(12345, group=>1);
1540                                 # "one, two, three, four, five"
1541
1542     $words = NUMWORDS(12345, group=>2);
1543                                 # "twelve, thirty-four, five"
1544
1545     $words = NUMWORDS(12345, group=>3);
1546                                 # "one twenty-three, forty-five"
1547
1548     $words = NUMWORDS(1234, 'and'=>'');
1549                                 # "one thousand, two hundred thirty-four"
1550
1551     $words = NUMWORDS(1234, 'and'=>', plus');
1552                                 # "one thousand, two hundred, plus thirty-four"
1553
1554     $words = NUMWORDS(555_1202, group=>1, zero=>'oh');
1555                                 # "five, five, five, one, two, oh, two"
1556
1557     $words = NUMWORDS(555_1202, group=>1, one=>'unity');
1558                                 # "five, five, five, unity, two, oh, two"
1559
1560     $words = NUMWORDS(123.456, group=>1, decimal=>'mark');
1561                                 # "one two three mark four five six"
1562
1563
1564  # REQUIRE "CLASSICAL" PLURALS (EG: "focus"->"foci", "cherub"->"cherubim")
1565
1566       classical;              # USE ALL CLASSICAL PLURALS
1567
1568       classical 1;               #  USE ALL CLASSICAL PLURALS
1569       classical 0;               #  USE ALL MODERN PLURALS (DEFAULT)
1570
1571       classical 'zero';      #  "no error" INSTEAD OF "no errors"
1572       classical zero=>1;     #  "no error" INSTEAD OF "no errors"
1573       classical zero=>0;     #  "no errors" INSTEAD OF "no error"
1574
1575       classical 'herd';      #  "2 buffalo" INSTEAD OF "2 buffalos"
1576       classical herd=>1;     #  "2 buffalo" INSTEAD OF "2 buffalos"
1577       classical herd=>0;     #  "2 buffalos" INSTEAD OF "2 buffalo"
1578
1579       classical 'persons';   # "2 chairpersons" INSTEAD OF "2 chairpeople"
1580       classical persons=>1;  # "2 chairpersons" INSTEAD OF "2 chairpeople"
1581       classical persons=>0;  # "2 chairpeople" INSTEAD OF "2 chairpersons"
1582
1583       classical 'ancient';   # "2 formulae" INSTEAD OF "2 formulas"
1584       classical ancient=>1;  # "2 formulae" INSTEAD OF "2 formulas"
1585       classical ancient=>0;  # "2 formulas" INSTEAD OF "2 formulae"
1586
1587
1588
1589  # INTERPOLATE "PL()", "PL_N()", "PL_V()", "PL_ADJ()", A()", "AN()"
1590  # "NUM()" AND "ORD()" WITHIN STRINGS:
1591
1592       print inflect("The plural of $word is PL($word)\n");
1593       print inflect("I saw $cat_count PL("cat",$cat_count)\n");
1594       print inflect("PL(I,$N1) PL_V(saw,$N1) PL(a,$N2) PL_N(saw,$N2)");
1595       print inflect("NUM($N1,)PL(I) PL_V(saw) NUM($N2,)PL(a) PL_N(saw)");
1596       print inflect("I saw NUM($cat_count) PL("cat")\nNUM()");
1597       print inflect("There PL_V(was,$errors) NO(error,$errors)\n");
1598       print inflect("There NUM($errors,) PL_V(was) NO(error)\n";
1599       print inflect("Did you want A($thing) or AN($idea)\n");
1600       print inflect("It was ORD($position) from the left\n");
1601
1602
1603  # ADD USER-DEFINED INFLECTIONS (OVERRIDING INBUILT RULES):
1604
1605       def_noun  "VAX"  => "VAXen";      # SINGULAR => PLURAL
1606
1607       def_verb  "will" => "shall",      # 1ST PERSON SINGULAR => PLURAL
1608                 "will" => "will",       # 2ND PERSON SINGULAR => PLURAL
1609                 "will" => "will",       # 3RD PERSON SINGULAR => PLURAL
1610
1611       def_adj   "hir"  => "their",      # SINGULAR => PLURAL
1612
1613       def_a     "h"                     # "AY HALWAYS SEZ 'HAITCH'!"
1614
1615       def_an    "horrendous.*"          # "AN HORRENDOUS AFFECTATION"
1616
1617
1618 =head1 DESCRIPTION
1619
1620 The exportable subroutines of Lingua::EN::Inflect provide plural
1621 inflections, "a"/"an" selection for English words, and manipulation
1622 of numbers as words
1623
1624 Plural forms of all nouns, most verbs, and some adjectives are
1625 provided. Where appropriate, "classical" variants (for example: "brother" ->
1626 "brethren", "dogma" -> "dogmata", etc.) are also provided.
1627
1628 Pronunciation-based "a"/"an" selection is provided for all English
1629 words, and most initialisms.
1630
1631 It is also possible to inflect numerals (1,2,3) to ordinals (1st, 2nd, 3rd)
1632 and to english words ("one", "two", "three).
1633
1634 In generating these inflections, Lingua::EN::Inflect follows the Oxford
1635 English Dictionary and the guidelines in Fowler's Modern English
1636 Usage, preferring the former where the two disagree.
1637
1638 The module is built around standard British spelling, but is designed
1639 to cope with common American variants as well. Slang, jargon, and
1640 other English dialects are I<not> explicitly catered for.
1641
1642 Where two or more inflected forms exist for a single word (typically a
1643 "classical" form and a "modern" form), Lingua::EN::Inflect prefers the
1644 more common form (typically the "modern" one), unless "classical"
1645 processing has been specified
1646 (see L<"MODERN VS CLASSICAL INFLECTIONS">).
1647
1648 =head1 FORMING PLURALS
1649
1650 =head2 Inflecting Plurals
1651
1652 All of the C<PL_...> plural inflection subroutines take the word to be
1653 inflected as their first argument and return the corresponding inflection.
1654 Note that all such subroutines expect the I<singular> form of the word. The
1655 results of passing a plural form are undefined (and unlikely to be correct).
1656
1657 The C<PL_...> subroutines also take an optional second argument,
1658 which indicates the grammatical "number" of the word (or of another word
1659 with which the word being inflected must agree). If the "number" argument is
1660 supplied and is not C<1> (or C<"one"> or C<"a">, or some other adjective that
1661 implies the singular), the plural form of the word is returned. If the
1662 "number" argument I<does> indicate singularity, the (uninflected) word
1663 itself is returned. If the number argument is omitted, the plural form
1664 is returned unconditionally.
1665
1666 The various subroutines are:
1667
1668 =over 8
1669
1670 =item C<PL_N($;$)>
1671
1672 The exportable subroutine C<PL_N()> takes a I<singular> English noun or
1673 pronoun and returns its plural. Pronouns in the nominative ("I" ->
1674 "we") and accusative ("me" -> "us") cases are handled, as are
1675 possessive pronouns ("mine" -> "ours").
1676
1677
1678 =item C<PL_V($;$)>
1679
1680 The exportable subroutine C<PL_V()> takes the I<singular> form of a
1681 conjugated verb (that is, one which is already in the correct "person"
1682 and "mood") and returns the corresponding plural conjugation.
1683
1684
1685 =item C<PL_ADJ($;$)>
1686
1687 The exportable subroutine C<PL_ADJ()> takes the I<singular> form of
1688 certain types of adjectives and returns the corresponding plural form.
1689 Adjectives that are correctly handled include: "numerical" adjectives
1690 ("a" -> "some"), demonstrative adjectives ("this" -> "these", "that" ->
1691 "those"), and possessives ("my" -> "our", "cat's" -> "cats'", "child's"
1692 -> "childrens'", etc.)
1693
1694
1695 =item C<PL($;$)>
1696
1697 The exportable subroutine C<PL()> takes a I<singular> English noun,
1698 pronoun, verb, or adjective and returns its plural form. Where a word
1699 has more than one inflection depending on its part of speech (for
1700 example, the noun "thought" inflects to "thoughts", the verb "thought"
1701 to "thought"), the (singular) noun sense is preferred to the (singular)
1702 verb sense.
1703
1704 Hence C<PL("knife")> will return "knives" ("knife" having been treated
1705 as a singular noun), whereas C<PL("knifes")> will return "knife"
1706 ("knifes" having been treated as a 3rd person singular verb).
1707
1708 The inherent ambiguity of such cases suggests that,
1709 where the part of speech is known, C<PL_N>, C<PL_V>, and
1710 C<PL_ADJ> should be used in preference to C<PL>.
1711
1712 =back
1713
1714 Note that all these subroutines ignore any whitespace surrounding the
1715 word being inflected, but preserve that whitespace when the result is
1716 returned. For example, C<S<PL(" cat  ")>> returns S<" cats  ">.
1717
1718
1719 =head2 Numbered plurals
1720
1721 The C<PL_...> subroutines return only the inflected word, not the count that
1722 was used to inflect it. Thus, in order to produce "I saw 3 ducks", it
1723 is necessary to use:
1724
1725         print "I saw $N ", PL_N($animal,$N), "\n";
1726
1727 Since the usual purpose of producing a plural is to make it agree with
1728 a preceding count, Lingua::EN::Inflect provides an exportable subroutine
1729 (C<NO($;$)>) which, given a word and a(n optional) count, returns the
1730 count followed by the correctly inflected word. Hence the previous
1731 example can be rewritten:
1732
1733         print "I saw ", NO($animal,$N), "\n";
1734
1735 In addition, if the count is zero (or some other term which implies
1736 zero, such as C<"zero">, C<"nil">, etc.) the count is replaced by the
1737 word "no". Hence, if C<$N> had the value zero, the previous example
1738 would print the somewhat more elegant:
1739
1740         I saw no animals
1741
1742 rather than:
1743
1744         I saw 0 animals
1745
1746 Note that the name of the subroutine is a pun: the subroutine
1747 returns either a number (a I<No.>) or a C<"no">, in front of the
1748 inflected word.
1749
1750
1751 =head2 Reducing the number of counts required
1752
1753 In some contexts, the need to supply an explicit count to the various
1754 C<PL_...> subroutines makes for tiresome repetition. For example:
1755
1756         print PL_ADJ("This",$errors), PL_N(" error",$errors),
1757               PL_V(" was",$errors), " fatal.\n";
1758
1759 Lingua::EN::Inflect therefore provides an exportable subroutine
1760 (C<NUM($;$)>) which may be used to set a persistent "default number"
1761 value. If such a value is set, it is subsequently used whenever an
1762 optional second "number" argument is omitted. The default value thus set
1763 can subsequently be removed by calling C<NUM()> with no arguments.
1764 Hence we could rewrite the previous example:
1765
1766         NUM($errors);
1767         print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n";
1768         NUM();
1769
1770 Normally, C<NUM()> returns its first argument, so that it may also
1771 be "inlined" in contexts like:
1772
1773         print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n"
1774         print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
1775                 if $severity > 1;
1776
1777 However, in certain contexts (see L<"INTERPOLATING INFLECTIONS IN STRINGS">)
1778 it is preferable that C<NUM()> return an empty string. Hence C<NUM()>
1779 provides an optional second argument. If that argument is supplied (that is, if
1780 it is defined) and evaluates to false, C<NUM> returns an empty string
1781 instead of its first argument. For example:
1782
1783         print NUM($errors,0), NO("error"), PL_V(" was"), " detected.\n";
1784         print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
1785                 if $severity > 1;
1786
1787
1788
1789 =head2 Number-insensitive equality
1790
1791 Lingua::EN::Inflect also provides a solution to the problem
1792 of comparing words of differing plurality through the exportable subroutines
1793 C<PL_eq($$)>, C<PL_N_eq($$)>, C<PL_V_eq($$)>, and C<PL_ADJ_eq($$)>.
1794 Each  of these subroutines takes two strings, and  compares them
1795 using the corresponding plural-inflection subroutine (C<PL()>, C<PL_N()>,
1796 C<PL_V()>, and C<PL_ADJ()> respectively).
1797
1798 The comparison returns true if:
1799
1800 =over 8
1801
1802 =item *
1803
1804 the strings are C<eq>-equal, or
1805
1806 =item *
1807
1808 one string is C<eq>-equal to a plural form of the other, or
1809
1810 =item *
1811
1812 the strings are two different plural forms of the one word.
1813
1814 =back
1815
1816 Hence all of the following return true:
1817
1818         PL_eq("index","index")          # RETURNS "eq"
1819         PL_eq("index","indexes")        # RETURNS "s:p"
1820         PL_eq("index","indices")        # RETURNS "s:p"
1821         PL_eq("indexes","index")        # RETURNS "p:s"
1822         PL_eq("indices","index")        # RETURNS "p:s"
1823         PL_eq("indices","indexes")      # RETURNS "p:p"
1824         PL_eq("indexes","indices")      # RETURNS "p:p"
1825         PL_eq("indices","indices")      # RETURNS "eq"
1826
1827 As indicated by the comments in the previous example, the actual value
1828 returned by the various C<PL_eq_...> subroutines encodes which of the
1829 three equality rules succeeded: "eq" is returned if the strings were
1830 identical, "s:p" if the strings were singular and plural respectively,
1831 "p:s" for plural and singular, and "p:p" for two distinct plurals.
1832 Inequality is indicated by returning an empty string.
1833
1834 It should be noted that two distinct singular words which happen to take
1835 the same plural form are I<not> considered equal, nor are cases where
1836 one (singular) word's plural is the other (plural) word's singular.
1837 Hence all of the following return false:
1838
1839         PL_eq("base","basis")       # ALTHOUGH BOTH -> "bases"
1840         PL_eq("syrinx","syringe")   # ALTHOUGH BOTH -> "syringes"
1841         PL_eq("she","he")           # ALTHOUGH BOTH -> "they"
1842
1843         PL_eq("opus","operas")      # ALTHOUGH "opus" -> "opera" -> "operas"
1844         PL_eq("taxi","taxes")       # ALTHOUGH "taxi" -> "taxis" -> "taxes"
1845
1846 Note too that, although the comparison is "number-insensitive" it is I<not>
1847 case-insensitive (that is, C<PL("time","Times")> returns false. To obtain
1848 both number and case insensitivity, prefix both arguments with C<lc>
1849 (that is, C<PL(lc "time", lc "Times")> returns true).
1850
1851
1852 =head1 OTHER VERB FORMS
1853
1854 =head2 Present participles
1855
1856 C<Lingua::EN::Inflect> also provides the C<PART_PRES> subroutine,
1857 which can take a 3rd person singular verb and
1858 correctly inflect it to its present participle:
1859
1860         PART_PRES("runs")       # "running"
1861         PART_PRES("loves")      # "loving"
1862         PART_PRES("eats")       # "eating"
1863         PART_PRES("bats")       # "batting"
1864         PART_PRES("spies")      # "spying"
1865
1866
1867 =head1 PROVIDING INDEFINITE ARTICLES
1868
1869 =head2 Selecting indefinite articles
1870
1871 Lingua::EN::Inflect provides two exportable subroutines (C<A($;$)> and
1872 C<AN($;$)>) which will correctly prepend the appropriate indefinite
1873 article to a word, depending on its pronunciation. For example:
1874
1875         A("cat")                # -> "a cat"
1876         AN("cat")               # -> "a cat"
1877         A("euphemism")          # -> "a euphemism"
1878         A("Euler number")       # -> "an Euler number"
1879         A("hour")               # -> "an hour"
1880         A("houri")              # -> "a houri"
1881
1882 The two subroutines are I<identical> in function and may be used
1883 interchangeably. The only reason that two versions are provided is to
1884 enhance the readability of code such as:
1885
1886         print "That is ", AN($errortype), " error\n;
1887         print "That is ", A($fataltype), " fatal error\n;
1888
1889 Note that in both cases the actual article provided depends I<only> on
1890 the pronunciation of the first argument, I<not> on the name of the
1891 subroutine.
1892
1893 C<A()> and C<AN()> will ignore any indefinite article that already
1894 exists at the start of the string. Thus:
1895
1896         @half_arked = (
1897                 "a elephant",
1898                 "a giraffe",
1899                 "an ewe",
1900                 "a orangutan",
1901         );
1902
1903         print A($_), "\n" for @half_arked;
1904
1905         # prints:
1906         #     an elephant
1907         #     a giraffe
1908         #     a ewe
1909         #     an orangutan
1910
1911
1912 C<A()> and C<AN()> both take an optional second argument. As with the
1913 C<PL_...> subroutines, this second argument is a "number" specifier. If
1914 its value is C<1> (or some other value implying singularity), C<A()> and
1915 C<AN()> insert "a" or "an" as appropriate. If the number specifier
1916 implies plurality, (C<A()> and C<AN()> insert the actual second argument instead.
1917 For example:
1918
1919         A("cat",1)              # -> "a cat"
1920         A("cat",2)              # -> "2 cat"
1921         A("cat","one")          # -> "one cat"
1922         A("cat","no")           # -> "no cat"
1923
1924 Note that, as implied by the previous examples, C<A()> and
1925 C<AN()> both assume that their job is merely to provide the correct
1926 qualifier for a word (that is: "a", "an", or the specified count).
1927 In other words, they assume that the word they are given has
1928 already been correctly inflected for plurality. Hence, if C<$N>
1929 has the value 2, then:
1930
1931       print A("cat",$N);
1932
1933 prints "2 cat", instead of "2 cats". The correct approach is to use:
1934
1935       print A(PL("cat",$N),$N);
1936
1937 or, better still:
1938
1939       print NO("cat",$N);
1940
1941 Note too that, like the various C<PL_...> subroutines, whenever C<A()>
1942 and C<AN()> are called with only one argument they are subject to the
1943 effects of any preceding call to C<NUM()>. Hence, another possible
1944 solution is:
1945
1946       NUM($N);
1947       print A(PL("cat"));
1948
1949
1950 =head2 Indefinite articles and initialisms
1951
1952 "Initialisms" (sometimes inaccurately called "acronyms") are terms which
1953 have been formed from the initial letters of words in a phrase (for
1954 example, "NATO", "NBL", "S.O.S.", "SCUBA", etc.)
1955
1956 Such terms present a particular challenge when selecting between "a"
1957 and "an", since they are sometimes pronounced as if they were a single
1958 word ("nay-tow", "sku-ba") and sometimes as a series of letter names
1959 ("en-eff-ell", "ess-oh-ess").
1960
1961 C<A()> and C<AN()> cope with this dichotomy using a series of inbuilt
1962 rules, which may be summarized as:
1963
1964 =over 8
1965
1966 =item 1.
1967
1968 If the word starts with a single letter, followed by a period or dash
1969 (for example, "R.I.P.", "C.O.D.", "e-mail", "X-ray", "T-square"), then
1970 choose the appropriate article for the I<sound> of the first letter
1971 ("an R.I.P.", "a C.O.D.", "an e-mail", "an X-ray", "a T-square").
1972
1973 =item 2.
1974
1975 If the first two letters of the word are capitals,
1976 consonants, and do not appear at the start of any known English word,
1977 (for example, "LCD", "XML", "YWCA"), then once again choose "a" or
1978 "an" depending on the I<sound> of the first letter ("an LCD", "an
1979 XML", "a YWCA").
1980
1981 =item 3.
1982
1983 Otherwise, assume the string is a capitalized word or a
1984 pronounceable initialism (for example, "LED", "OPEC", "FAQ", "UNESCO"), and
1985 therefore takes "a" or "an" according to the (apparent) pronunciation of
1986 the entire word ("a LED", "an OPEC", "a FAQ", "a UNESCO").
1987
1988 =back
1989
1990 Note that rules 1 and 3 together imply that the presence or absence of
1991 punctuation may change the selection of indefinite article for a
1992 particular initialism (for example, "a FAQ" but "an F.A.Q.").
1993
1994
1995 =head2 Indefinite articles and "soft H's"
1996
1997 Words beginning in the letter 'H' present another type of difficulty
1998 when selecting a suitable indefinite article. In a few such words
1999 (for example, "hour", "honour", "heir") the 'H' is not voiced at
2000 all, and so such words inflect with "an". The remaining cases
2001 ("voiced H's") may be divided into two categories:
2002 "hard H's" (such as "hangman", "holograph", "hat", etc.) and
2003 "soft H's" (such as "hysterical", "horrendous", "holy", etc.)
2004
2005 Hard H's always take "a" as their indefinite article, and soft
2006 H's normally do so as well. But I<some> English speakers prefer
2007 "an" for soft H's (although the practice is now generally considered an
2008 affectation, rather than a legitimate grammatical alternative).
2009
2010 At present, the C<A()> and C<AN()> subroutines ignore soft H's and use
2011 "a" for any voiced 'H'. The author would, however, welcome feedback on
2012 this decision (envisaging a possible future "soft H" mode).
2013
2014
2015 =head1 INFLECTING ORDINALS
2016
2017 Occasionally it is useful to present an integer value as an ordinal
2018 rather than as a numeral. For example:
2019
2020         Enter password (1st attempt): ********
2021         Enter password (2nd attempt): *********
2022         Enter password (3rd attempt): *********
2023         No 4th attempt. Access denied.
2024
2025 To this end, Lingua::EN::Inflect provides the C<ORD()> subroutine.
2026 <ORD()> takes a single argument and forms its ordinal equivalent.
2027 If the argument isn't a numerical integer, it just adds "-th".
2028
2029
2030 =head1 CONVERTING NUMBERS TO WORDS
2031
2032 The exportable subroutine C<NUMWORDS> takes a number (cardinal or ordinal)
2033 and returns an English representation of that number. In a scalar context
2034 a string is returned. Hence:
2035
2036         use Lingua::EN::Inflect qw( NUMWORDS );
2037
2038         $words = NUMWORDS(1234567);
2039
2040 puts the string:
2041
2042         "one million, two hundred and thirty-four thousand, five hundred and sixty-seven"
2043
2044 into $words.
2045
2046 In a list context each comma-separated chunk is returned as a separate element.
2047 Hence:
2048
2049         @words = NUMWORDS(1234567);
2050
2051 puts the list:
2052
2053         ("one million",
2054          "two hundred and thirty-four thousand",
2055          "five hundred and sixty-seven")
2056
2057 into @words.
2058
2059 Non-digits (apart from an optional leading plus or minus sign,
2060 any decimal points, and ordinal suffixes -- see below) are silently
2061 ignored, so the following all produce identical results:
2062
2063         NUMWORDS(5551202);
2064         NUMWORDS(5_551_202);
2065         NUMWORDS("5,551,202");
2066         NUMWORDS("555-1202");
2067
2068 That last case is a little awkward since it's almost certainly a phone number,
2069 and "five million, five hundred and fifty-one thousand, two hundred and two"
2070 probably isn't what's wanted.
2071
2072 To overcome this, C<NUMWORDS()> takes an optional named argument, 'group',
2073 which changes how numbers are translated. The argument must be a
2074 positive integer less than four, which indicated how the digits of the
2075 number are to be grouped. If the argument is C<1>, then each digit is
2076 translated separately. If the argument is C<2>, pairs of digits
2077 (starting from the I<left>) are grouped together. If the argument is
2078 C<3>, triples of numbers (again, from the I<left>) are grouped. Hence:
2079
2080         NUMWORDS("555-1202", group=>1)
2081
2082 returns C<"five, five, five, one, two, zero, two">, whilst:
2083
2084         NUMWORDS("555-1202", group=>2)
2085
2086 returns C<"fifty-five, fifty-one, twenty, two">, and:
2087
2088         NUMWORDS("555-1202", group=>3)
2089
2090 returns C<"five fifty-five, one twenty, two">.
2091
2092 Phone numbers are often written in words as
2093 C<"five..five..five..one..two..zero..two">, which is also easy to
2094 achieve:
2095
2096         join '..', NUMWORDS("555-1202", group=>1)
2097
2098 C<NUMWORDS> also handles decimal fractions. Hence:
2099
2100         NUMWORDS("1.2345")
2101
2102 returns C<"one point two three four five"> in a scalar context
2103 and C<("one","point","two","three","four","five")>) in an array context.
2104 Exponent form (C<"1.234e56">) is not yet handled.
2105
2106 Multiple decimal points are only translated in one of the "grouping" modes.
2107 Hence:
2108
2109         NUMWORDS(101.202.303)
2110
2111 returns C<"one hundred and one point two zero two three zero three">,
2112 whereas:
2113
2114         NUMWORDS(101.202.303, group=>1)
2115
2116 returns C<"one zero one point two zero two point three zero three">.
2117
2118 The digit C<'0'> is unusual in that in may be translated to English as "zero",
2119 "oh", or "nought". To cater for this diversity, C<NUMWORDS> may be passed
2120 a named argument, 'zero', which may be set to
2121 the desired translation of C<'0'>. For example:
2122
2123         print join "..", NUMWORDS("555-1202", group=>3, zero=>'oh')
2124
2125 prints C<"five..five..five..one..two..oh..two">.
2126 By default, zero is rendered as "zero".
2127
2128 Likewise, the digit C<'1'> may be rendered as "one" or "a/an" (or very
2129 occasionally other variants), depending on the context. So there is a
2130 C<'one'> argument as well:
2131
2132         print NUMWORDS($_, one=>'a solitary', zero=>'no more'),
2133               PL(" bottle of beer on the wall\n", $_)
2134                    for (3,2,1,0);
2135
2136         # prints:
2137         #     three bottles of beer on the wall
2138         #     two bottles of beer on the wall
2139         #     a solitary bottle of beer on the wall
2140         #     no more bottles of beer on the wall
2141
2142 Care is needed if the word "a/an" is to be used as a C<'one'> value.
2143 Unless the next word is known in advance, it's almost always necessary
2144 to use the C<A> function as well:
2145
2146         print A( NUMWORDS(1, one=>'a') . " $_\n")
2147              for qw(cat aardvark ewe hour);
2148
2149         # prints:
2150         #     a cat
2151         #     an aardvark
2152         #     a ewe
2153         #     an hour
2154
2155 Another major regional variation in number translation is the use of
2156 "and" in certain contexts. The named argument 'and'
2157 allows the programmer to specify how "and" should be handled. Hence:
2158
2159         print scalar NUMWORDS("765", 'and'=>'')
2160
2161 prints "seven hundred sixty-five", instead of "seven hundred and sixty-five".
2162 By default, the "and" is included.
2163
2164 The translation of the decimal point is also subject to variation
2165 (with "point", "dot", and "decimal" being the favorites).
2166 The named argument 'decimal' allows the
2167 programmer to how the decimal point should be rendered. Hence:
2168
2169         print scalar NUMWORDS("666.124.64.101", group=>3, decimal=>'dot')
2170
2171 prints "six sixty-six, dot, one twenty-four, dot, sixty-four, dot, one zero one"
2172 By default, the decimal point is rendered as "point".
2173
2174 C<NUMWORDS> also handles the ordinal forms of numbers. So:
2175
2176         print scalar NUMWORDS('1st');
2177         print scalar NUMWORDS('3rd');
2178         print scalar NUMWORDS('202nd');
2179         print scalar NUMWORDS('1000000th');
2180
2181 print:
2182
2183         first
2184         third
2185         two hundred and twenty-second
2186         one millionth
2187
2188 Two common idioms in this regard are:
2189
2190         print scalar NUMWORDS(ORD($number));
2191
2192 and:
2193
2194         print scalar ORD(NUMWORDS($number));
2195
2196 These are identical in effect, except when $number contains a decimal:
2197
2198         $number = 99.09;
2199         print scalar NUMWORDS(ORD($number));    # ninety-ninth point zero nine
2200         print scalar ORD(NUMWORDS($number));    # ninety-nine point zero ninth
2201
2202 Use whichever you feel is most appropriate.
2203
2204
2205 =head1 INTERPOLATING INFLECTIONS IN STRINGS
2206
2207 By far the commonest use of the inflection subroutines is to
2208 produce message strings for various purposes. For example:
2209
2210         print NUM($errors), PL_N(" error"), PL_V(" was"), " detected.\n";
2211         print PL_ADJ("This"), PL_N(" error"), PL_V(" was"), "fatal.\n"
2212                 if $severity > 1;
2213
2214 Unfortunately the need to separate each subroutine call detracts
2215 significantly from the readability of the resulting code. To ameliorate
2216 this problem, Lingua::EN::Inflect provides an exportable string-interpolating
2217 subroutine (C<inflect($)>), which recognizes calls to the various inflection
2218 subroutines within a string and interpolates them appropriately.
2219
2220 Using C<inflect> the previous example could be rewritten:
2221
2222         print inflect "NUM($errors) PL_N(error) PL_V(was) detected.\n";
2223         print inflect "PL_ADJ(This) PL_N(error) PL_V(was) fatal.\n"
2224                 if $severity > 1;
2225
2226 Note that C<inflect> also correctly handles calls to the C<NUM()> subroutine
2227 (whether interpolated or antecedent). The C<inflect()> subroutine has
2228 a related extra feature, in that it I<automatically> cancels any "default
2229 number" value before it returns its interpolated string. This means that
2230 calls to C<NUM()> which are embedded in an C<inflect()>-interpolated
2231 string do not "escape" and interfere with subsequent inflections.
2232
2233
2234 =head1 MODERN VS CLASSICAL INFLECTIONS
2235
2236 Certain words, mainly of Latin or Ancient Greek origin, can form
2237 plurals either using the standard English "-s" suffix, or with
2238 their original Latin or Greek inflections. For example:
2239
2240         PL("stigma")            # -> "stigmas" or "stigmata"
2241         PL("torus")             # -> "toruses" or "tori"
2242         PL("index")             # -> "indexes" or "indices"
2243         PL("millennium")        # -> "millenniums" or "millennia"
2244         PL("ganglion")          # -> "ganglions" or "ganglia"
2245         PL("octopus")           # -> "octopuses" or "octopodes"
2246
2247
2248 Lingua::EN::Inflect caters to such words by providing an
2249 "alternate state" of inflection known as "classical mode".
2250 By default, words are inflected using their contemporary English
2251 plurals, but if classical mode is invoked, the more traditional
2252 plural forms are returned instead.
2253
2254 The exportable subroutine C<classical()> controls this feature.
2255 If C<classical()> is called with no arguments, it unconditionally
2256 invokes classical mode. If it is called with a single argument, it
2257 turns all classical inflects on or off (depending on whether the argument is
2258 true or false). If called with two or more arguments, those arguments
2259 specify which aspects of classical behaviour are to be used.
2260
2261 Thus:
2262
2263         classical;                  # SWITCH ON CLASSICAL MODE
2264         print PL("formula");        # -> "formulae"
2265
2266         classical 0;                # SWITCH OFF CLASSICAL MODE
2267         print PL("formula");        # -> "formulas"
2268
2269         classical $cmode;           # CLASSICAL MODE IFF $cmode
2270         print PL("formula");        # -> "formulae" (IF $cmode)
2271                                     # -> "formulas" (OTHERWISE)
2272
2273         classical herd=>1;          # SWITCH ON CLASSICAL MODE FOR "HERD" NOUNS
2274         print PL("wilderbeest");    # -> "wilderbeest"
2275
2276         classical names=>1;         # SWITCH ON CLASSICAL MODE FOR NAMES
2277         print PL("sally");          # -> "sallies"
2278         print PL("Sally");          # -> "Sallys"
2279
2280 Note however that C<classical()> has no effect on the inflection of words which
2281 are now fully assimilated. Hence:
2282
2283         PL("forum")             # ALWAYS -> "forums"
2284         PL("criterion")         # ALWAYS -> "criteria"
2285
2286 LEI assumes that a capitalized word is a person's name. So it forms the
2287 plural according to the rules for names (which is that you don't
2288 inflect, you just add -s or -es). You can choose to turn that behaviour
2289 off (it's on by the default, even when the module isn't in classical
2290 mode) by calling C< classical(names=>0) >;
2291
2292 =head1 USER-DEFINED INFLECTIONS
2293
2294 =head2 Adding plurals at run-time
2295
2296 Lingua::EN::Inflect provides five exportable subroutines which allow
2297 the programmer to override the module's behaviour for specific cases:
2298
2299 =over 8
2300
2301 =item C<def_noun($$)>
2302
2303 The C<def_noun> subroutine takes a pair of string arguments: the singular and
2304 plural forms of the noun being specified. The singular form
2305 specifies a pattern to be interpolated (as C<m/^(?:$first_arg)$/i>).
2306 Any noun matching this pattern is then replaced by the string in the
2307 second argument. The second argument specifies a string which is
2308 interpolated after the match succeeds, and is then used as the plural
2309 form. For example:
2310
2311       def_noun  'cow'        => 'kine';
2312       def_noun  '(.+i)o'     => '$1i';
2313       def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!';
2314
2315 Note that both arguments should usually be specified in single quotes,
2316 so that they are not interpolated when they are specified, but later (when
2317 words are compared to them). As indicated by the last example, care
2318 also needs to be taken with certain characters in the second argument,
2319 to ensure that they are not unintentionally interpolated during comparison.
2320
2321 The second argument string may also specify a second variant of the plural
2322 form, to be used when "classical" plurals have been requested. The beginning
2323 of the second variant is marked by a '|' character:
2324
2325       def_noun  'cow'        => 'cows|kine';
2326       def_noun  '(.+i)o'     => '$1os|$1i';
2327       def_noun  'spam(mer)?' => '\\$\\%\\@#\\$\\@#!!|varmints';
2328
2329 If no classical variant is given, the specified plural form is used in
2330 both normal and "classical" modes.
2331
2332 If the second argument is C<undef> instead of a string, then the
2333 current user definition for the first argument is removed, and the
2334 standard plural inflection(s) restored.
2335
2336 Note that in all cases, later plural definitions for a particular
2337 singular form replace earlier definitions of the same form. For example:
2338
2339       # FIRST, HIDE THE MODERN FORM....
2340       def_noun  'aviatrix' => 'aviatrices';
2341
2342       # LATER, HIDE THE CLASSICAL FORM...
2343       def_noun  'aviatrix' => 'aviatrixes';
2344
2345       # FINALLY, RESTORE THE DEFAULT BEHAVIOUR...
2346       def_noun  'aviatrix' => undef;
2347
2348
2349 Special care is also required when defining general patterns and
2350 associated specific exceptions: put the more specific cases I<after>
2351 the general pattern. For example:
2352
2353       def_noun  '(.+)us' => '$1i';      # EVERY "-us" TO "-i"
2354       def_noun  'bus'    => 'buses';    # EXCEPT FOR "bus"
2355
2356 This "try-most-recently-defined-first" approach to matching
2357 user-defined words is also used by C<def_verb>, C<def_a> and C<def_an>.
2358
2359
2360 =item C<def_verb($$$$$$)>
2361
2362 The C<def_verb> subroutine takes three pairs of string arguments (that is, six
2363 arguments in total), specifying the singular and plural forms of the three
2364 "persons" of verb. As with C<def_noun>, the singular forms are specifications of
2365 run-time-interpolated patterns, whilst the plural forms are specifications of
2366 (up to two) run-time-interpolated strings:
2367
2368        def_verb 'am'       => 'are',
2369                 'are'      => 'are|art",
2370                 'is'       => 'are';
2371
2372        def_verb 'have'     => 'have',
2373                 'have'     => 'have",
2374                 'ha(s|th)' => 'have';
2375
2376 Note that as with C<def_noun>, modern/classical variants of plurals
2377 may be separately specified, subsequent definitions replace previous
2378 ones, and C<undef>'ed plural forms revert to the standard behaviour.
2379
2380
2381 =item C<def_adj($$)>
2382
2383 The C<def_adj> subroutine takes a pair of string arguments, which specify
2384 the singular and plural forms of the adjective being defined.
2385 As with C<def_noun> and C<def_adj>, the singular forms are specifications of
2386 run-time-interpolated patterns, whilst the plural forms are specifications of
2387 (up to two) run-time-interpolated strings:
2388
2389        def_adj  'this'     => 'these',
2390        def_adj  'red'      => 'red|gules',
2391
2392 As previously, modern/classical variants of plurals
2393 may be separately specified, subsequent definitions replace previous
2394 ones, and C<undef>'ed plural forms revert to the standard behaviour.
2395
2396
2397 =item C<def_a($)> and C<def_an($)>
2398
2399 The C<def_a> and C<def_an> subroutines each take a single argument, which
2400 specifies a pattern. If a word passed to C<A()> or C<AN()> matches this
2401 pattern, it will be prefixed (unconditionally) with the corresponding indefinite
2402 article. For example:
2403
2404       def_a  'error';
2405       def_a  'in.+';
2406
2407       def_an 'mistake';
2408       def_an 'error';
2409
2410 As with the other C<def_...> subroutines, such redefinitions are sequential
2411 in effect so that, after the above example, "error" will be inflected with "an".
2412
2413 =back
2414
2415 =head2 The F<$HOME/.inflectrc> file
2416
2417 When it is imported, Lingua::EN::Inflect executes (as Perl code)
2418 the contents of any file named F<.inflectrc> which it finds in the
2419 in the directory where F<Lingua/EN/Inflect.pm> is installed,
2420 or in the current home directory (C<$ENV{HOME}>), or in both.
2421 Note that the code is executed within the Lingua::EN::Inflect
2422 namespace.
2423
2424 Hence the user or the local Perl guru can make appropriate calls to
2425 C<def_noun>, C<def_verb>, etc. in one of these F<.inflectrc> files, to
2426 permanently and universally modify the behaviour of the module. For example
2427
2428       > cat /usr/local/lib/perl5/Text/Inflect/.inflectrc
2429
2430       def_noun  "UNIX"  => "UN*X|UNICES";
2431
2432       def_verb  "teco"  => "teco",      # LITERALLY: "to edit with TECO"
2433                 "teco"  => "teco",
2434                 "tecos" => "teco";
2435
2436       def_a     "Euler.*";              # "Yewler" TURNS IN HIS GRAVE
2437
2438
2439 Note that calls to the C<def_...> subroutines from within a program
2440 will take precedence over the contents of the home directory
2441 F<.inflectrc> file, which in turn takes precedence over the system-wide
2442 F<.inflectrc> file.
2443
2444
2445 =head1 DIAGNOSTICS
2446
2447 On loading, if the Perl code in a F<.inflectrc> file is invalid
2448 (syntactically or otherwise), an appropriate fatal error is issued.
2449 A common problem is not ending the file with something that
2450 evaluates to true (as the five C<def_...> subroutines do).
2451
2452 Using the five C<def_...> subroutines directly in a program may also
2453 result in fatal diagnostics, if a (singular) pattern or an interpolated
2454 (plural) string is somehow invalid.
2455
2456 Specific diagnostics related to user-defined inflections are:
2457
2458 =over 8
2459
2460 =item C<"Bad user-defined singular pattern:\n\t %s">
2461
2462 The singular form of a user-defined noun or verb
2463 (as defined by a call to C<def_noun>, C<def_verb>, C<def_adj>,
2464 C<def_a> or C<def_an>) is not a valid Perl regular expression. The
2465 actual Perl error message is also given.
2466
2467 =item C<"Bad user-defined plural string: '%s'">
2468
2469 The plural form(s) of a user-defined noun or verb
2470 (as defined by a call to C<def_noun>, C<def_verb> or C<def_adj>)
2471 is not a valid Perl interpolated string (usually because it
2472 interpolates some undefined variable).
2473
2474 =item C<"Bad .inflectrc file (%s):\n %s">
2475
2476 Some other problem occurred in loading the named local
2477 or global F<.inflectrc> file. The Perl error message (including
2478 the line number) is also given.
2479
2480 =back
2481
2482 There are I<no> diagnosable run-time error conditions for the actual
2483 inflection subroutines, except C<NUMWORDS> and hence no run-time
2484 diagnostics. If the inflection subroutines are unable to form a plural
2485 via a user-definition or an inbuilt rule, they just "guess" the
2486 commonest English inflection: adding "-s" for nouns, removing "-s" for
2487 verbs, and no inflection for adjectives.
2488
2489 C<Lingua::EN::Inflect::NUMWORDS()> can C<die> with the following messages:
2490
2491 =over 8
2492
2493 =item C<"Bad grouping option: %s">
2494
2495 The optional argument to C<NUMWORDS()> wasn't 1, 2 or 3.
2496
2497 =item C<"Number out of range">
2498
2499 C<NUMWORDS()> was passed a number larger than
2500 999,999,999,999,999,999,999,999,999,999,999,999 (that is: nine hundred
2501 and ninety-nine decillion, nine hundred and ninety-nine nonillion, nine
2502 hundred and ninety-nine octillion, nine hundred and ninety-nine
2503 septillion, nine hundred and ninety-nine sextillion, nine hundred and
2504 ninety-nine quintillion, nine hundred and ninety-nine quadrillion, nine
2505 hundred and ninety-nine trillion, nine hundred and ninety-nine billion,
2506 nine hundred and ninety-nine million, nine hundred and ninety-nine
2507 thousand, nine hundred and ninety-nine :-)
2508
2509 The problem is that C<NUMWORDS> doesn't know any
2510 words for number components bigger than "decillion".
2511
2512
2513 =head1 OTHER ISSUES
2514
2515 =head2 2nd Person precedence
2516
2517 If a verb has identical 1st and 2nd person singular forms, but
2518 different 1st and 2nd person plural forms, then when its plural is
2519 constructed, the 2nd person plural form is always preferred.
2520
2521 The author is not currently aware of any such verbs in English, but is
2522 not quite arrogant enough to assume I<ipso facto> that none exist.
2523
2524
2525 =head2 Nominative precedence
2526
2527 The singular pronoun "it" presents a special problem because its plural form
2528 can vary, depending on its "case". For example:
2529
2530         It ate my homework       ->  They ate my homework
2531         It ate it                ->  They ate them
2532         I fed my homework to it  ->  I fed my homework to them
2533
2534 As a consequence of this ambiguity, C<PL()> or C<PL_N> have been implemented
2535 so that they always return the I<nominative> plural (that is, "they").
2536
2537 However, when asked for the plural of an unambiguously I<accusative>
2538 "it" (namely, C<PL("to it")>, C<PL_N("from it")>, C<PL("with it")>,
2539 etc.), both subroutines will correctly return the accusative plural
2540 ("to them", "from them", "with them", etc.)
2541
2542
2543 =head2 The plurality of zero
2544
2545 The rules governing the choice between:
2546
2547       There were no errors.
2548
2549 and
2550
2551       There was no error.
2552
2553 are complex and often depend more on I<intent> rather than I<content>.
2554 Hence it is infeasible to specify such rules algorithmically.
2555
2556 Therefore, Lingua::EN::Text contents itself with the following compromise: If
2557 the governing number is zero, inflections always return the plural form
2558 unless the appropriate "classical" inflection is in effect, in which case the
2559 singular form is always returned.
2560
2561 Thus, the sequence:
2562
2563       NUM(0);
2564       print inflect "There PL(was) NO(choice)";
2565
2566 produces "There were no choices", whereas:
2567
2568       classical 'zero';     # or: classical(zero=>1);
2569       NUM(0);
2570       print inflect "There PL(was) NO(choice)";
2571
2572 it will print "There was no choice".
2573
2574
2575 =head2 Homographs with heterogeneous plurals
2576
2577 Another context in which intent (and not content) sometimes determines
2578 plurality is where two distinct meanings of a word require different
2579 plurals. For example:
2580
2581       Three basses were stolen from the band's equipment trailer.
2582       Three bass were stolen from the band's aquarium.
2583
2584       I put the mice next to the cheese.
2585       I put the mouses next to the computers.
2586
2587       Several thoughts about leaving crossed my mind.
2588       Several thought about leaving across my lawn.
2589
2590 Lingua::EN::Inflect handles such words in two ways:
2591
2592 =over 8
2593
2594 =item *
2595
2596 If both meanings of the word are the I<same> part of speech (for
2597 example, "bass" is a noun in both sentences above), then one meaning
2598 is chosen as the "usual" meaning, and only that meaning's plural is
2599 ever returned by any of the inflection subroutines.
2600
2601 =item *
2602
2603 If each meaning of the word is a different part of speech (for
2604 example, "thought" is both a noun and a verb), then the noun's
2605 plural is returned by C<PL()> and C<PL_N()> and the verb's plural is
2606 returned only by C<PL_V()>.
2607
2608 =back
2609
2610 Such contexts are, fortunately, uncommon (particularly
2611 "same-part-of-speech" examples). An informal study of nearly 600
2612 "difficult plurals" indicates that C<PL()> can be relied upon to "get
2613 it right" about 98% of the time (although, of course, ichthyophilic
2614 guitarists or cyber-behaviouralists may experience higher rates of
2615 confusion).
2616
2617 If the choice of a particular "usual inflection" is considered
2618 inappropriate, it can always be reversed with a preliminary call
2619 to the corresponding C<def_...> subroutine.
2620
2621 =head1 NOTE
2622
2623 I'm not taking any further correspondence on:
2624
2625 =over
2626
2627 =item "octopi".
2628
2629 Despite the populist pandering of certain New World dictionaries, the
2630 plural is "octopuses" or (for the pendantic classicist) "octopodes". The
2631 suffix "-pus" is Greek, not Latin, so the plural is "-podes", not "pi".
2632
2633
2634 =item "virus".
2635
2636 Had no plural in Latin (possibly because it was a mass noun).
2637 The only plural is the Anglicized "viruses".
2638
2639 =back
2640
2641 =head1 AUTHORS
2642
2643 Damian Conway (damian@conway.org)
2644 Matthew Persico (ORD inflection)
2645
2646
2647 =head1 BUGS AND IRRITATIONS
2648
2649 The endless inconsistencies of English.
2650
2651 (I<Please> report words for which the correct plural or
2652 indefinite article is not formed, so that the reliability
2653 of Lingua::EN::Inflect can be improved.)
2654
2655
2656
2657 =head1 COPYRIGHT
2658
2659  Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
2660  This module is free software. It may be used, redistributed
2661      and/or modified under the same terms as Perl itself.