[p5sagit/p5-mst-13.2.git] / lib / legacy.pm

package legacy;

our $VERSION = '1.00';

$unicode8bit::hint_uni8bit = 0x00000800;

my %legacy_bundle = (
    "5.10" => [qw(unicode8bit)],
    "5.11" => [qw(unicode8bit)],
);

my %legacy = ( 'unicode8bit' => '0' );

=head1 NAME

legacy - Perl pragma to preserve legacy behaviors or enable new non-default
behaviors

=head1 SYNOPSIS

 use legacy ':5.10'; # Keeps semantics the same as in perl 5.10

 use legacy qw(unicode8bit);

 no legacy;

 no legacy qw(unicode8bit);

=head1 DESCRIPTION

Some programs may rely on behaviors that for others are problematic or
even wrong.  A new version of Perl may change behaviors from past ones,
and when it is viewed that the old way of doing things may be required
to still be supported, the new behavior will be able to be turned off by using
this pragma.

Additionally, a new behavior may be supported in a new version of Perl, but
for whatever reason the default remains the old one.  This pragma can enable
the new behavior.

Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
only make the legacy behavior for "foo" available from that point to the end of
the enclosing block.

=head2 B<use legacy>

Preserve the old way of doing things when a new version of Perl is
released that would otherwise change the behavior.

The one current possibility is:

=head3 unicode8bit

THIS IS SUBJECT TO CHANGE

Use legacy semantics for the 128 characters on ASCII systems that have the 8th
bit set.  (See L</EBCDIC platforms> below for EBCDIC systems.)  Unless
C<S<use locale>> is specified, or the scalar containing such a character is
known by Perl to be encoded in UTF8, the semantics are essentially that the
characters have an ordinal number, and that's it.  They are caseless, and
aren't anything: they're not controls, not letters, not punctuation, ..., not
anything.

This behavior stems from when Perl did not support Unicode, and ASCII was the
only known character set outside of C<S<use locale>>.  In order to not
possibly break pre_Unicode programs, these characters have retained their old
non-meanings, except when it is clear to Perl that Unicode is what is meant,
for example by calling utf::upgrade() on a scalar, or if the scalar also
contains characters that are only available in Unicode.  Then these 128
characters take on their Unicode meanings.

The problem with this behavior is that a scalar that encodes these characters
has a different meaning depending on if it is stored as utf8 or not.
In general, the internal storage method should not affect the
external behavior.

The behavior is known to have effects on these areas:

=over 4

=item

Changing the case of a scalar, that is, using C<uc()>,
C<ucfirst()>,
C<lc()>,
and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression substitutions.

=item

Using caseless (C</i>) regular expression matching

=item

Matching a number of properties in regular expressions, such as C<\w>

=item

User-defined case change mappings.  You can create a C<ToUpper()> function, for
example, which overrides Perl's built-in case mappings.  The scalar must be
encoded in utf8 for your function to actually be invoked.

=back

B<This lack of semantics for these characters is currently the default,>
outside of C<use locale>.  See below for EBCDIC.
To turn on B<case changing semantics only> for these characters, use
C<S<no legacy>>.
The other legacy behaviors regarding these characters are currently
unaffected by this pragma.

=head4 EBCDIC platforms

On EBCDIC platforms, the situation is somewhat different.  The legacy
semantics are whatever the underlying semantics of the native C language
library are.  Each of the three EBCDIC encodings currently known by Perl is an
isomorph of the Latin-1 character set.  That means every character in Latin-1
has a corresponding EBCDIC equivalent, and vice-versa.  Specifying C<S<no
legacy>> currently makes sure that all EBCDIC characters have the same
B<casing only> semantics as their corresponding Latin-1 characters.

=head2 B<no legacy>

Turn on a new behavior in a version of Perl that understands
it but has it turned off by default.  For example, C<no legacy 'foo'> turns on
behavior C<foo> in the lexical scope of the pragma.  C<no legacy>
without any modifier turns on all new behaviors known to the pragma.

=head1 LEGACY BUNDLES

It's possible to turn off all new behaviors past a given release by
using a I<legacy bundle>, which is the name of the release prefixed with
a colon, to distinguish it from an individual legacy behavior.

Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
no effect: legacy bundles are guaranteed to be the same for all sub-versions.

Legacy bundles are not allowed with C<no legacy>

=cut

sub import {
    my $class = shift;
    if (@_ == 0) {
        croak("No legacy behaviors specified");
    }
    while (@_) {
        my $name = shift(@_);
        if (substr($name, 0, 1) eq ":") {
            my $v = substr($name, 1);
            if (!exists $legacy_bundle{$v}) {
                $v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
                if (!exists $legacy_bundle{$v}) {
                    unknown_legacy_bundle(substr($name, 1));
                }
            }
            unshift @_, @{$legacy_bundle{$v}};
            next;
        }
        if (!exists $legacy{$name}) {
            unknown_legacy($name);
        }
        $^H &= ~$unicode8bit::hint_uni8bit;    # The only valid thing as of yet
    }
}


sub unimport {
    my $class = shift;

    # A bare C<no legacy> should disable *all* legacy behaviors
    if (!@_) {
        unshift @_, keys(%legacy);
    }

    while (@_) {
        my $name = shift;
        if (substr($name, 0, 1) eq ":") {
            croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
                $name));
        }
        if (!exists($legacy{$name})) {
            unknown_legacy($name);
        }
        else {
            $^H |= $unicode8bit::hint_uni8bit; # The only valid thing as of yet
        }
    }
}

sub unknown_legacy {
    my $legacy = shift;
    croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
}

sub unknown_legacy_bundle {
    my $legacy = shift;
    croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
        $legacy, $^V));
}

sub croak {
    require Carp;
    Carp::croak(@_);
}

1;
Commit	Line	Data
66060160	1	package legacy;
	2
	3	our $VERSION = '1.00';
	4
00f254e2	5	$unicode8bit::hint_uni8bit = 0x00000800;
66060160	6
	7	my %legacy_bundle = (
	8	"5.10" => [qw(unicode8bit)],
	9	"5.11" => [qw(unicode8bit)],
	10	);
	11
	12	my %legacy = ( 'unicode8bit' => '0' );
	13
	14	=head1 NAME
	15
	16	legacy - Perl pragma to preserve legacy behaviors or enable new non-default
	17	behaviors
	18
	19	=head1 SYNOPSIS
	20
	21	use legacy ':5.10'; # Keeps semantics the same as in perl 5.10
	22
00f254e2	23	use legacy qw(unicode8bit);
66060160	24
00f254e2	25	no legacy;
66060160	26
00f254e2	27	no legacy qw(unicode8bit);
66060160	28
	29	=head1 DESCRIPTION
	30
	31	Some programs may rely on behaviors that for others are problematic or
	32	even wrong. A new version of Perl may change behaviors from past ones,
	33	and when it is viewed that the old way of doing things may be required
00f254e2	34	to still be supported, the new behavior will be able to be turned off by using
00f254e2	35	this pragma.
66060160	36
	37	Additionally, a new behavior may be supported in a new version of Perl, but
	38	for whatever reason the default remains the old one. This pragma can enable
	39	the new behavior.
	40
	41	Like other pragmas (C<use feature>, for example), C<use legacy qw(foo)> will
	42	only make the legacy behavior for "foo" available from that point to the end of
	43	the enclosing block.
	44
66060160	45	=head2 B<use legacy>
	46
	47	Preserve the old way of doing things when a new version of Perl is
00f254e2	48	released that would otherwise change the behavior.
	49
	50	The one current possibility is:
	51
	52	=head3 unicode8bit
	53
	54	THIS IS SUBJECT TO CHANGE
	55
	56	Use legacy semantics for the 128 characters on ASCII systems that have the 8th
	57	bit set. (See L</EBCDIC platforms> below for EBCDIC systems.) Unless
	58	C<S<use locale>> is specified, or the scalar containing such a character is
	59	known by Perl to be encoded in UTF8, the semantics are essentially that the
	60	characters have an ordinal number, and that's it. They are caseless, and
	61	aren't anything: they're not controls, not letters, not punctuation, ..., not
	62	anything.
	63
	64	This behavior stems from when Perl did not support Unicode, and ASCII was the
	65	only known character set outside of C<S<use locale>>. In order to not
	66	possibly break pre_Unicode programs, these characters have retained their old
	67	non-meanings, except when it is clear to Perl that Unicode is what is meant,
	68	for example by calling utf::upgrade() on a scalar, or if the scalar also
	69	contains characters that are only available in Unicode. Then these 128
	70	characters take on their Unicode meanings.
	71
	72	The problem with this behavior is that a scalar that encodes these characters
	73	has a different meaning depending on if it is stored as utf8 or not.
	74	In general, the internal storage method should not affect the
	75	external behavior.
	76
	77	The behavior is known to have effects on these areas:
	78
	79	=over 4
	80
	81	=item
	82
	83	Changing the case of a scalar, that is, using C<uc()>,
	84	C<ucfirst()>,
	85	C<lc()>,
	86	and C<lcfirst()>, or C<\L>, C<\U>, C<\u> and C<\l> in regular expression substitutions.
	87
	88	=item
	89
	90	Using caseless (C</i>) regular expression matching
	91
	92	=item
	93
	94	Matching a number of properties in regular expressions, such as C<\w>
	95
	96	=item
	97
	98	User-defined case change mappings. You can create a C<ToUpper()> function, for
	99	example, which overrides Perl's built-in case mappings. The scalar must be
	100	encoded in utf8 for your function to actually be invoked.
	101
	102	=back
	103
	104	B<This lack of semantics for these characters is currently the default,>
	105	outside of C<use locale>. See below for EBCDIC.
	106	To turn on B<case changing semantics only> for these characters, use
	107	C<S<no legacy>>.
	108	The other legacy behaviors regarding these characters are currently
	109	unaffected by this pragma.
	110
	111	=head4 EBCDIC platforms
112
113	On EBCDIC platforms, the situation is somewhat different. The legacy
114	semantics are whatever the underlying semantics of the native C language
115	library are. Each of the three EBCDIC encodings currently known by Perl is an
116	isomorph of the Latin-1 character set. That means every character in Latin-1
117	has a corresponding EBCDIC equivalent, and vice-versa. Specifying C<S<no
118	legacy>> currently makes sure that all EBCDIC characters have the same
119	B<casing only> semantics as their corresponding Latin-1 characters.
66060160	120
	121	=head2 B<no legacy>
	122
	123	Turn on a new behavior in a version of Perl that understands
	124	it but has it turned off by default. For example, C<no legacy 'foo'> turns on
00f254e2	125	behavior C<foo> in the lexical scope of the pragma. C<no legacy>
00f254e2	126	without any modifier turns on all new behaviors known to the pragma.
66060160	127
	128	=head1 LEGACY BUNDLES
	129
00f254e2	130	It's possible to turn off all new behaviors past a given release by
66060160	131	using a I<legacy bundle>, which is the name of the release prefixed with
	132	a colon, to distinguish it from an individual legacy behavior.
	133
	134	Specifying sub-versions such as the C<0> in C<5.10.0> in legacy bundles has
	135	no effect: legacy bundles are guaranteed to be the same for all sub-versions.
	136
	137	Legacy bundles are not allowed with C<no legacy>
	138
	139	=cut
	140
	141	sub import {
	142	my $class = shift;
	143	if (@_ == 0) {
	144	croak("No legacy behaviors specified");
	145	}
	146	while (@_) {
	147	my $name = shift(@_);
	148	if (substr($name, 0, 1) eq ":") {
	149	my $v = substr($name, 1);
	150	if (!exists $legacy_bundle{$v}) {
	151	$v =~ s/^([0-9]+)\.([0-9]+).[0-9]+$/$1.$2/;
	152	if (!exists $legacy_bundle{$v}) {
	153	unknown_legacy_bundle(substr($name, 1));
	154	}
	155	}
	156	unshift @_, @{$legacy_bundle{$v}};
	157	next;
	158	}
	159	if (!exists $legacy{$name}) {
	160	unknown_legacy($name);
	161	}
00f254e2	162	$^H &= ~$unicode8bit::hint_uni8bit; # The only valid thing as of yet
66060160	163	}
	164	}
	165
	166
	167	sub unimport {
	168	my $class = shift;
	169
	170	# A bare C<no legacy> should disable all legacy behaviors
	171	if (!@_) {
	172	unshift @_, keys(%legacy);
	173	}
	174
	175	while (@_) {
	176	my $name = shift;
	177	if (substr($name, 0, 1) eq ":") {
	178	croak(sprintf('Legacy bundles (%s) are not allowed in "no legacy"',
	179	$name));
	180	}
	181	if (!exists($legacy{$name})) {
	182	unknown_legacy($name);
	183	}
	184	else {
00f254e2	185	$^H \|= $unicode8bit::hint_uni8bit; # The only valid thing as of yet
66060160	186	}
	187	}
	188	}
	189
	190	sub unknown_legacy {
	191	my $legacy = shift;
	192	croak(sprintf('Legacy "%s" is not supported by Perl %vd', $legacy, $^V));
	193	}
	194
	195	sub unknown_legacy_bundle {
	196	my $legacy = shift;
	197	croak(sprintf('Legacy bundle "%s" is not supported by Perl %vd',
	198	$legacy, $^V));
	199	}
	200
	201	sub croak {
	202	require Carp;
	203	Carp::croak(@_);
	204	}
	205
	206	1;