[p5sagit/p5-mst-13.2.git] / lib / legacy.pm

package legacy;

our $VERSION = '1.00';

$unicode8bit::hint_not_uni8bit = 0x00000800;

my %legacy_bundle = (
    "5.10" => [qw(unicode8bit)],
    "5.11" => [qw(unicode8bit)],
);

my %legacy = ( 'unicode8bit' => '0' );

=head1 NAME

legacy - Perl pragma to preserve legacy behaviors or enable new non-default behaviors

=head1 SYNOPSIS

 use legacy ':5.10'; # Keeps semantics the same as in perl 5.10

 use legacy qw(unicode8bit);

 no legacy;

 no legacy qw(unicode8bit);

=head1 DESCRIPTION

Some programs may rely on behaviors that for others are problematic or
even wrong.  A new version of Perl may change behaviors from past ones,
and when it is viewed that the old way of doing things may be required
to still be supported, the new behavior will be able to be turned off by using
this pragma.

Additionally, a new behavior may be supported in a new version of Perl, but
for whatever reason the default remains the old one.  This pragma can enable
the new behavior.

Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
only make the legacy behavior for "foo" available from that point to the end of
the enclosing block.

=head2 B<use legacy>

Preserve the old way of doing things when a new version of Perl is
released that would otherwise change the behavior.

The one current possibility is:

=head3 unicode8bit

Use legacy semantics for the 128 characters on ASCII systems that have the 8th
bit set.  (See L</EBCDIC platforms> below for EBCDIC systems.)  Unless
C<S<use locale>> is specified, or the scalar containing such a character is
known by Perl to be encoded in UTF8, the semantics are essentially that the
characters have an ordinal number, and that's it.  They are caseless, and
aren't anything: they're not controls, not letters, not punctuation, ..., not
anything.

This behavior stems from when Perl did not support Unicode, and ASCII was the
only known character set outside of C<S<use locale>>.  In order to not
possibly break pre-Unicode programs, these characters have retained their old
non-meanings, except when it is clear to Perl that Unicode is what is meant,
for example by calling utf8::upgrade() on a scalar, or if the scalar also
contains characters that are only available in Unicode.  Then these 128
characters take on their Unicode meanings.

The problem with this behavior is that a scalar that encodes these characters
has a different meaning depending on if it is stored as utf8 or not.
In general, the internal storage method should not affect the
external behavior.

The behavior is known to have effects on these areas:

=over 4

=item *

Changing the case of a scalar, that is, using C<uc()>, C<ucfirst()>, C<lc()>,
and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression
substitutions.

=item *

Using caseless (C</i>) regular expression matching

=item *

Matching a number of properties in regular expressions, such as C<\w>

=item *

User-defined case change mappings.  You can create a C<ToUpper()> function, for
example, which overrides Perl's built-in case mappings.  The scalar must be
encoded in utf8 for your function to actually be invoked.

=back

B<This lack of semantics for these characters is currently the default,>
outside of C<use locale>.  See below for EBCDIC.
To turn on B<case changing semantics only> for these characters, use
C<S<no legacy>>.
The other legacy behaviors regarding these characters are currently
unaffected by this pragma.

=head4 EBCDIC platforms

On EBCDIC platforms, the situation is somewhat different.  The legacy
semantics are whatever the underlying semantics of the native C language
library are.  Each of the three EBCDIC encodings currently known by Perl is an
isomorph of the Latin-1 character set.  That means every character in Latin-1
has a corresponding EBCDIC equivalent, and vice-versa.  Specifying C<S<no
legacy>> currently makes sure that all EBCDIC characters have the same
B<casing only> semantics as their corresponding Latin-1 characters.

=head2 B<no legacy>

Turn on a new behavior in a version of Perl that understands
it but has it turned off by default.  For example, C<no legacy 'foo'> turns on
behavior C<foo> in the lexical scope of the pragma.  C<no legacy>
without any modifier turns on all new behaviors known to the pragma.

=head1 LEGACY BUNDLES

It's possible to turn off all new behaviors past a given release by
using a I<legacy bundle>, which is the name of the release prefixed with
a colon, to distinguish it from an individual legacy behavior.

Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
no effect: legacy bundles are guaranteed to be the same for all sub-versions.

Legacy bundles are not allowed with C<no legacy>.

=cut

sub import {
    my $class = shift;
    if (@_ == 0) {
        croak("No legacy behaviors specified");
    }
    while (@_) {
        my $name = shift(@_);
        if (substr($name, 0, 1) eq ":") {
            my $v = substr($name, 1);
            if (!exists $legacy_bundle{$v}) {
                $v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
                if (!exists $legacy_bundle{$v}) {
                    unknown_legacy_bundle(substr($name, 1));
                }
            }
            unshift @_, @{$legacy_bundle{$v}};
            next;
        }
        if (!exists $legacy{$name}) {
            unknown_legacy($name);
        }
        $^H |= $unicode8bit::hint_not_uni8bit;   # The only valid thing as of yet
    }
}


sub unimport {
    my $class = shift;

    # A bare C<no legacy> should disable *all* legacy behaviors
    if (!@_) {
        unshift @_, keys(%legacy);
    }

    while (@_) {
        my $name = shift;
        if (substr($name, 0, 1) eq ":") {
            croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
                $name));
        }
        if (!exists($legacy{$name})) {
            unknown_legacy($name);
        }
        else {
            $^H &= ~ $unicode8bit::hint_not_uni8bit; # The only valid thing now
        }
    }
}

sub unknown_legacy {
    my $legacy = shift;
    croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
}

sub unknown_legacy_bundle {
    my $legacy = shift;
    croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
        $legacy, $^V));
}

sub croak {
    require Carp;
    Carp::croak(@_);
}

1;
Commit	Line	Data
66060160	1	package legacy;
	2
	3	our $VERSION = '1.00';
	4
61fc5122	5	$unicode8bit::hint_not_uni8bit = 0x00000800;
66060160	6
	7	my %legacy_bundle = (
	8	"5.10" => [qw(unicode8bit)],
	9	"5.11" => [qw(unicode8bit)],
	10	);
	11
	12	my %legacy = ( 'unicode8bit' => '0' );
	13
	14	=head1 NAME
	15
a994735a	16	legacy - Perl pragma to preserve legacy behaviors or enable new non-default behaviors
66060160	17
	18	=head1 SYNOPSIS
	19
	20	use legacy ':5.10'; # Keeps semantics the same as in perl 5.10
	21
00f254e2	22	use legacy qw(unicode8bit);
66060160	23
00f254e2	24	no legacy;
66060160	25
00f254e2	26	no legacy qw(unicode8bit);
66060160	27
	28	=head1 DESCRIPTION
	29
	30	Some programs may rely on behaviors that for others are problematic or
	31	even wrong. A new version of Perl may change behaviors from past ones,
	32	and when it is viewed that the old way of doing things may be required
00f254e2	33	to still be supported, the new behavior will be able to be turned off by using
00f254e2	34	this pragma.
66060160	35
	36	Additionally, a new behavior may be supported in a new version of Perl, but
	37	for whatever reason the default remains the old one. This pragma can enable
	38	the new behavior.
	39
	40	Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
	41	only make the legacy behavior for "foo" available from that point to the end of
	42	the enclosing block.
	43
66060160	44	=head2 B<use legacy>
	45
	46	Preserve the old way of doing things when a new version of Perl is
00f254e2	47	released that would otherwise change the behavior.
	48
	49	The one current possibility is:
	50
	51	=head3 unicode8bit
	52
00f254e2	53	Use legacy semantics for the 128 characters on ASCII systems that have the 8th
	54	bit set. (See L</EBCDIC platforms> below for EBCDIC systems.) Unless
	55	C<S<use locale>> is specified, or the scalar containing such a character is
	56	known by Perl to be encoded in UTF8, the semantics are essentially that the
	57	characters have an ordinal number, and that's it. They are caseless, and
	58	aren't anything: they're not controls, not letters, not punctuation, ..., not
	59	anything.
	60
	61	This behavior stems from when Perl did not support Unicode, and ASCII was the
	62	only known character set outside of C<S<use locale>>. In order to not
e1e0053f	63	possibly break pre-Unicode programs, these characters have retained their old
00f254e2	64	non-meanings, except when it is clear to Perl that Unicode is what is meant,
a994735a	65	for example by calling utf8::upgrade() on a scalar, or if the scalar also
00f254e2	66	contains characters that are only available in Unicode. Then these 128
	67	characters take on their Unicode meanings.
	68
	69	The problem with this behavior is that a scalar that encodes these characters
	70	has a different meaning depending on if it is stored as utf8 or not.
	71	In general, the internal storage method should not affect the
	72	external behavior.
	73
	74	The behavior is known to have effects on these areas:
	75
	76	=over 4
	77
a994735a	78	=item *
00f254e2	79
a994735a	80	Changing the case of a scalar, that is, using C<uc()>, C<ucfirst()>, C<lc()>,
e1e0053f	81	and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression
e1e0053f	82	substitutions.
00f254e2	83
a994735a	84	=item *
00f254e2	85
	86	Using caseless (C</i>) regular expression matching
	87
a994735a	88	=item *
00f254e2	89
	90	Matching a number of properties in regular expressions, such as C<\w>
	91
a994735a	92	=item *
00f254e2	93
	94	User-defined case change mappings. You can create a C<ToUpper()> function, for
	95	example, which overrides Perl's built-in case mappings. The scalar must be
	96	encoded in utf8 for your function to actually be invoked.
	97
	98	=back
	99
	100	B<This lack of semantics for these characters is currently the default,>
	101	outside of C<use locale>. See below for EBCDIC.
	102	To turn on B<case changing semantics only> for these characters, use
	103	C<S<no legacy>>.
	104	The other legacy behaviors regarding these characters are currently
	105	unaffected by this pragma.
	106
	107	=head4 EBCDIC platforms
	108
	109	On EBCDIC platforms, the situation is somewhat different. The legacy
	110	semantics are whatever the underlying semantics of the native C language
	111	library are. Each of the three EBCDIC encodings currently known by Perl is an
	112	isomorph of the Latin-1 character set. That means every character in Latin-1
	113	has a corresponding EBCDIC equivalent, and vice-versa. Specifying C<S<no
	114	legacy>> currently makes sure that all EBCDIC characters have the same
	115	B<casing only> semantics as their corresponding Latin-1 characters.
66060160	116
	117	=head2 B<no legacy>
	118
	119	Turn on a new behavior in a version of Perl that understands
	120	it but has it turned off by default. For example, C<no legacy 'foo'> turns on
00f254e2	121	behavior C<foo> in the lexical scope of the pragma. C<no legacy>
00f254e2	122	without any modifier turns on all new behaviors known to the pragma.
66060160	123
	124	=head1 LEGACY BUNDLES
	125
00f254e2	126	It's possible to turn off all new behaviors past a given release by
66060160	127	using a I<legacy bundle>, which is the name of the release prefixed with
	128	a colon, to distinguish it from an individual legacy behavior.
	129
	130	Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
	131	no effect: legacy bundles are guaranteed to be the same for all sub-versions.
	132
e1e0053f	133	Legacy bundles are not allowed with C<no legacy>.
66060160	134
	135	=cut
	136
	137	sub import {
	138	my $class = shift;
	139	if (@_ == 0) {
	140	croak("No legacy behaviors specified");
	141	}
	142	while (@_) {
	143	my $name = shift(@_);
	144	if (substr($name, 0, 1) eq ":") {
	145	my $v = substr($name, 1);
	146	if (!exists $legacy_bundle{$v}) {
	147	$v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
	148	if (!exists $legacy_bundle{$v}) {
	149	unknown_legacy_bundle(substr($name, 1));
	150	}
	151	}
	152	unshift @_, @{$legacy_bundle{$v}};
	153	next;
	154	}
	155	if (!exists $legacy{$name}) {
	156	unknown_legacy($name);
	157	}
61fc5122	158	$^H \|= $unicode8bit::hint_not_uni8bit; # The only valid thing as of yet
66060160	159	}
	160	}
	161
	162
	163	sub unimport {
	164	my $class = shift;
	165
	166	# A bare C<no legacy> should disable all legacy behaviors
	167	if (!@_) {
	168	unshift @_, keys(%legacy);
	169	}
	170
	171	while (@_) {
	172	my $name = shift;
	173	if (substr($name, 0, 1) eq ":") {
	174	croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
	175	$name));
	176	}
	177	if (!exists($legacy{$name})) {
	178	unknown_legacy($name);
	179	}
	180	else {
61fc5122	181	$^H &= ~ $unicode8bit::hint_not_uni8bit; # The only valid thing now
66060160	182	}
	183	}
	184	}
	185
	186	sub unknown_legacy {
	187	my $legacy = shift;
	188	croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
	189	}
	190
	191	sub unknown_legacy_bundle {
	192	my $legacy = shift;
	193	croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
	194	$legacy, $^V));
	195	}
	196
	197	sub croak {
	198	require Carp;
	199	Carp::croak(@_);
	200	}
	201
	202	1;