[p5sagit/p5-mst-13.2.git] / lib / legacy.pm

package legacy;

our $VERSION = '1.00';

$unicode8bit::hint_not_uni8bit = 0x00000800;

my %legacy_bundle = (
    "5.10" => [qw(unicode8bit)],
    "5.11" => [qw(unicode8bit)],
);

my %legacy = ( 'unicode8bit' => '0' );

=head1 NAME

legacy - Perl pragma to preserve legacy behaviors or enable new non-default behaviors

=head1 SYNOPSIS

 use legacy ':5.10'; # Keeps semantics the same as in perl 5.10

 use legacy qw(unicode8bit);

 no legacy;

 no legacy qw(unicode8bit);

=head1 DESCRIPTION

Some programs may rely on behaviors that for others are problematic or
even wrong.  A new version of Perl may change behaviors from past ones,
and when it is viewed that the old way of doing things may be required
to still be supported, the new behavior will be able to be turned off by using
this pragma.

Additionally, a new behavior may be supported in a new version of Perl, but
for whatever reason the default remains the old one.  This pragma can enable
the new behavior.

Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
only make the legacy behavior for "foo" available from that point to the end of
the enclosing block.

=head2 B<use legacy>

Preserve the old way of doing things when a new version of Perl is
released that would otherwise change the behavior.

The one current possibility is:

=head3 unicode8bit

THIS IS SUBJECT TO CHANGE

Use legacy semantics for the 128 characters on ASCII systems that have the 8th
bit set.  (See L</EBCDIC platforms> below for EBCDIC systems.)  Unless
C<S<use locale>> is specified, or the scalar containing such a character is
known by Perl to be encoded in UTF8, the semantics are essentially that the
characters have an ordinal number, and that's it.  They are caseless, and
aren't anything: they're not controls, not letters, not punctuation, ..., not
anything.

This behavior stems from when Perl did not support Unicode, and ASCII was the
only known character set outside of C<S<use locale>>.  In order to not
possibly break pre_Unicode programs, these characters have retained their old
non-meanings, except when it is clear to Perl that Unicode is what is meant,
for example by calling utf8::upgrade() on a scalar, or if the scalar also
contains characters that are only available in Unicode.  Then these 128
characters take on their Unicode meanings.

The problem with this behavior is that a scalar that encodes these characters
has a different meaning depending on if it is stored as utf8 or not.
In general, the internal storage method should not affect the
external behavior.

The behavior is known to have effects on these areas:

=over 4

=item *

Changing the case of a scalar, that is, using C<uc()>, C<ucfirst()>, C<lc()>,
and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression substitutions.

=item *

Using caseless (C</i>) regular expression matching

=item *

Matching a number of properties in regular expressions, such as C<\w>

=item *

User-defined case change mappings.  You can create a C<ToUpper()> function, for
example, which overrides Perl's built-in case mappings.  The scalar must be
encoded in utf8 for your function to actually be invoked.

=back

B<This lack of semantics for these characters is currently the default,>
outside of C<use locale>.  See below for EBCDIC.
To turn on B<case changing semantics only> for these characters, use
C<S<no legacy>>.
The other legacy behaviors regarding these characters are currently
unaffected by this pragma.

=head4 EBCDIC platforms

On EBCDIC platforms, the situation is somewhat different.  The legacy
semantics are whatever the underlying semantics of the native C language
library are.  Each of the three EBCDIC encodings currently known by Perl is an
isomorph of the Latin-1 character set.  That means every character in Latin-1
has a corresponding EBCDIC equivalent, and vice-versa.  Specifying C<S<no
legacy>> currently makes sure that all EBCDIC characters have the same
B<casing only> semantics as their corresponding Latin-1 characters.

=head2 B<no legacy>

Turn on a new behavior in a version of Perl that understands
it but has it turned off by default.  For example, C<no legacy 'foo'> turns on
behavior C<foo> in the lexical scope of the pragma.  C<no legacy>
without any modifier turns on all new behaviors known to the pragma.

=head1 LEGACY BUNDLES

It's possible to turn off all new behaviors past a given release by
using a I<legacy bundle>, which is the name of the release prefixed with
a colon, to distinguish it from an individual legacy behavior.

Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
no effect: legacy bundles are guaranteed to be the same for all sub-versions.

Legacy bundles are not allowed with C<no legacy>

=cut

sub import {
    my $class = shift;
    if (@_ == 0) {
        croak("No legacy behaviors specified");
    }
    while (@_) {
        my $name = shift(@_);
        if (substr($name, 0, 1) eq ":") {
            my $v = substr($name, 1);
            if (!exists $legacy_bundle{$v}) {
                $v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
                if (!exists $legacy_bundle{$v}) {
                    unknown_legacy_bundle(substr($name, 1));
                }
            }
            unshift @_, @{$legacy_bundle{$v}};
            next;
        }
        if (!exists $legacy{$name}) {
            unknown_legacy($name);
        }
        $^H |= $unicode8bit::hint_not_uni8bit;   # The only valid thing as of yet
    }
}


sub unimport {
    my $class = shift;

    # A bare C<no legacy> should disable *all* legacy behaviors
    if (!@_) {
        unshift @_, keys(%legacy);
    }

    while (@_) {
        my $name = shift;
        if (substr($name, 0, 1) eq ":") {
            croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
                $name));
        }
        if (!exists($legacy{$name})) {
            unknown_legacy($name);
        }
        else {
            $^H &= ~ $unicode8bit::hint_not_uni8bit; # The only valid thing now
        }
    }
}

sub unknown_legacy {
    my $legacy = shift;
    croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
}

sub unknown_legacy_bundle {
    my $legacy = shift;
    croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
        $legacy, $^V));
}

sub croak {
    require Carp;
    Carp::croak(@_);
}

1;
Commit	Line	Data
66060160	1	package legacy;
	2
	3	our $VERSION = '1.00';
	4
61fc5122	5	$unicode8bit::hint_not_uni8bit = 0x00000800;
66060160	6
	7	my %legacy_bundle = (
	8	"5.10" => [qw(unicode8bit)],
	9	"5.11" => [qw(unicode8bit)],
	10	);
	11
	12	my %legacy = ( 'unicode8bit' => '0' );
	13
	14	=head1 NAME
	15
a994735a	16	legacy - Perl pragma to preserve legacy behaviors or enable new non-default behaviors
66060160	17
	18	=head1 SYNOPSIS
	19
	20	use legacy ':5.10'; # Keeps semantics the same as in perl 5.10
	21
00f254e2	22	use legacy qw(unicode8bit);
66060160	23
00f254e2	24	no legacy;
66060160	25
00f254e2	26	no legacy qw(unicode8bit);
66060160	27
	28	=head1 DESCRIPTION
	29
	30	Some programs may rely on behaviors that for others are problematic or
	31	even wrong. A new version of Perl may change behaviors from past ones,
	32	and when it is viewed that the old way of doing things may be required
00f254e2	33	to still be supported, the new behavior will be able to be turned off by using
00f254e2	34	this pragma.
66060160	35
	36	Additionally, a new behavior may be supported in a new version of Perl, but
	37	for whatever reason the default remains the old one. This pragma can enable
	38	the new behavior.
	39
	40	Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
	41	only make the legacy behavior for "foo" available from that point to the end of
	42	the enclosing block.
	43
66060160	44	=head2 B<use legacy>
	45
	46	Preserve the old way of doing things when a new version of Perl is
00f254e2	47	released that would otherwise change the behavior.
	48
	49	The one current possibility is:
	50
	51	=head3 unicode8bit
	52
	53	THIS IS SUBJECT TO CHANGE
	54
	55	Use legacy semantics for the 128 characters on ASCII systems that have the 8th
	56	bit set. (See L</EBCDIC platforms> below for EBCDIC systems.) Unless
	57	C<S<use locale>> is specified, or the scalar containing such a character is
	58	known by Perl to be encoded in UTF8, the semantics are essentially that the
	59	characters have an ordinal number, and that's it. They are caseless, and
	60	aren't anything: they're not controls, not letters, not punctuation, ..., not
	61	anything.
	62
	63	This behavior stems from when Perl did not support Unicode, and ASCII was the
	64	only known character set outside of C<S<use locale>>. In order to not
	65	possibly break pre_Unicode programs, these characters have retained their old
	66	non-meanings, except when it is clear to Perl that Unicode is what is meant,
a994735a	67	for example by calling utf8::upgrade() on a scalar, or if the scalar also
00f254e2	68	contains characters that are only available in Unicode. Then these 128
	69	characters take on their Unicode meanings.
	70
	71	The problem with this behavior is that a scalar that encodes these characters
	72	has a different meaning depending on if it is stored as utf8 or not.
	73	In general, the internal storage method should not affect the
	74	external behavior.
	75
	76	The behavior is known to have effects on these areas:
	77
	78	=over 4
	79
a994735a	80	=item *
00f254e2	81
a994735a	82	Changing the case of a scalar, that is, using C<uc()>, C<ucfirst()>, C<lc()>,
00f254e2	83	and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression substitutions.
00f254e2	84
a994735a	85	=item *
00f254e2	86
	87	Using caseless (C</i>) regular expression matching
	88
a994735a	89	=item *
00f254e2	90
	91	Matching a number of properties in regular expressions, such as C<\w>
	92
a994735a	93	=item *
00f254e2	94
	95	User-defined case change mappings. You can create a C<ToUpper()> function, for
	96	example, which overrides Perl's built-in case mappings. The scalar must be
	97	encoded in utf8 for your function to actually be invoked.
	98
	99	=back
	100
	101	B<This lack of semantics for these characters is currently the default,>
	102	outside of C<use locale>. See below for EBCDIC.
	103	To turn on B<case changing semantics only> for these characters, use
	104	C<S<no legacy>>.
	105	The other legacy behaviors regarding these characters are currently
	106	unaffected by this pragma.
	107
	108	=head4 EBCDIC platforms
	109
	110	On EBCDIC platforms, the situation is somewhat different. The legacy
	111	semantics are whatever the underlying semantics of the native C language
	112	library are. Each of the three EBCDIC encodings currently known by Perl is an
	113	isomorph of the Latin-1 character set. That means every character in Latin-1
	114	has a corresponding EBCDIC equivalent, and vice-versa. Specifying C<S<no
	115	legacy>> currently makes sure that all EBCDIC characters have the same
	116	B<casing only> semantics as their corresponding Latin-1 characters.
66060160	117
	118	=head2 B<no legacy>
	119
	120	Turn on a new behavior in a version of Perl that understands
	121	it but has it turned off by default. For example, C<no legacy 'foo'> turns on
00f254e2	122	behavior C<foo> in the lexical scope of the pragma. C<no legacy>
00f254e2	123	without any modifier turns on all new behaviors known to the pragma.
66060160	124
	125	=head1 LEGACY BUNDLES
	126
00f254e2	127	It's possible to turn off all new behaviors past a given release by
66060160	128	using a I<legacy bundle>, which is the name of the release prefixed with
	129	a colon, to distinguish it from an individual legacy behavior.
	130
	131	Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
	132	no effect: legacy bundles are guaranteed to be the same for all sub-versions.
	133
	134	Legacy bundles are not allowed with C<no legacy>
	135
	136	=cut
	137
	138	sub import {
	139	my $class = shift;
	140	if (@_ == 0) {
	141	croak("No legacy behaviors specified");
	142	}
	143	while (@_) {
	144	my $name = shift(@_);
	145	if (substr($name, 0, 1) eq ":") {
	146	my $v = substr($name, 1);
	147	if (!exists $legacy_bundle{$v}) {
	148	$v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
	149	if (!exists $legacy_bundle{$v}) {
	150	unknown_legacy_bundle(substr($name, 1));
	151	}
	152	}
	153	unshift @_, @{$legacy_bundle{$v}};
	154	next;
	155	}
	156	if (!exists $legacy{$name}) {
	157	unknown_legacy($name);
	158	}
61fc5122	159	$^H \|= $unicode8bit::hint_not_uni8bit; # The only valid thing as of yet
66060160	160	}
	161	}
	162
	163
	164	sub unimport {
	165	my $class = shift;
	166
	167	# A bare C<no legacy> should disable all legacy behaviors
	168	if (!@_) {
	169	unshift @_, keys(%legacy);
	170	}
	171
	172	while (@_) {
	173	my $name = shift;
	174	if (substr($name, 0, 1) eq ":") {
	175	croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
	176	$name));
	177	}
	178	if (!exists($legacy{$name})) {
	179	unknown_legacy($name);
	180	}
	181	else {
61fc5122	182	$^H &= ~ $unicode8bit::hint_not_uni8bit; # The only valid thing now
66060160	183	}
	184	}
	185	}
	186
	187	sub unknown_legacy {
	188	my $legacy = shift;
	189	croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
	190	}
	191
	192	sub unknown_legacy_bundle {
	193	my $legacy = shift;
	194	croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
	195	$legacy, $^V));
	196	}
	197
	198	sub croak {
	199	require Carp;
	200	Carp::croak(@_);
	201	}
	202
	203	1;