[p5sagit/p5-mst-13.2.git] / lib / bytes.pm

package bytes;

our $VERSION = '1.03';

$bytes::hint_bits = 0x00000008;

sub import {
    $^H |= $bytes::hint_bits;
}

sub unimport {
    $^H &= ~$bytes::hint_bits;
}

sub AUTOLOAD {
    require "bytes_heavy.pl";
    goto &$AUTOLOAD if defined &$AUTOLOAD;
    require Carp;
    Carp::croak("Undefined subroutine $AUTOLOAD called");
}

sub length (_);
sub chr (_);
sub ord (_);
sub substr ($$;$$);
sub index ($$;$);
sub rindex ($$;$);

1;
__END__

=head1 NAME

bytes - Perl pragma to force byte semantics rather than character semantics

=head1 SYNOPSIS

    use bytes;
    ... chr(...);       # or bytes::chr
    ... index(...);     # or bytes::index
    ... length(...);    # or bytes::length
    ... ord(...);       # or bytes::ord
    ... rindex(...);    # or bytes::rindex
    ... substr(...);    # or bytes::substr
    no bytes;


=head1 DESCRIPTION

The C<use bytes> pragma disables character semantics for the rest of the
lexical scope in which it appears.  C<no bytes> can be used to reverse
the effect of C<use bytes> within the current lexical scope.

Perl normally assumes character semantics in the presence of character
data (i.e. data that has come from a source that has been marked as
being of a particular character encoding). When C<use bytes> is in
effect, the encoding is temporarily ignored, and each string is treated
as a series of bytes. 

As an example, when Perl sees C<$x = chr(400)>, it encodes the character
in UTF-8 and stores it in $x. Then it is marked as character data, so,
for instance, C<length $x> returns C<1>. However, in the scope of the
C<bytes> pragma, $x is treated as a series of bytes - the bytes that make
up the UTF8 encoding - and C<length $x> returns C<2>:

    $x = chr(400);
    print "Length is ", length $x, "\n";     # "Length is 1"
    printf "Contents are %vd\n", $x;         # "Contents are 400"
    { 
        use bytes; # or "require bytes; bytes::length()"
        print "Length is ", length $x, "\n"; # "Length is 2"
        printf "Contents are %vd\n", $x;     # "Contents are 198.144"
    }

chr(), ord(), substr(), index() and rindex() behave similarly.

For more on the implications and differences between character
semantics and byte semantics, see L<perluniintro> and L<perlunicode>.

=head1 LIMITATIONS

bytes::substr() does not work as an lvalue().

=head1 SEE ALSO

L<perluniintro>, L<perlunicode>, L<utf8>

=cut
Commit	Line	Data
657b208b	1	package bytes;
5bc28da9	2
79077e6c	3	our $VERSION = '1.03';
b75c8c73	4
d5448623	5	$bytes::hint_bits = 0x00000008;
d5448623	6
5bc28da9	7	sub import {
d5448623	8	$^H \|= $bytes::hint_bits;
5bc28da9	9	}
	10
	11	sub unimport {
d5448623	12	$^H &= ~$bytes::hint_bits;
5bc28da9	13	}
	14
	15	sub AUTOLOAD {
657b208b	16	require "bytes_heavy.pl";
5b5a256a	17	goto &$AUTOLOAD if defined &$AUTOLOAD;
	18	require Carp;
	19	Carp::croak("Undefined subroutine $AUTOLOAD called");
5bc28da9	20	}
5bc28da9	21
79077e6c	22	sub length (_);
	23	sub chr (_);
	24	sub ord (_);
579f6b36	25	sub substr ($$;$$);
	26	sub index ($$;$);
	27	sub rindex ($$;$);
5bc28da9	28
	29	1;
	30	__END__
	31
	32	=head1 NAME
	33
657b208b	34	bytes - Perl pragma to force byte semantics rather than character semantics
5bc28da9	35
	36	=head1 SYNOPSIS
	37
657b208b	38	use bytes;
579f6b36	39	... chr(...); # or bytes::chr
	40	... index(...); # or bytes::index
	41	... length(...); # or bytes::length
	42	... ord(...); # or bytes::ord
	43	... rindex(...); # or bytes::rindex
	44	... substr(...); # or bytes::substr
657b208b	45	no bytes;
5bc28da9	46
579f6b36	47
5bc28da9	48	=head1 DESCRIPTION
5bc28da9	49
657b208b	50	The C<use bytes> pragma disables character semantics for the rest of the
	51	lexical scope in which it appears. C<no bytes> can be used to reverse
	52	the effect of C<use bytes> within the current lexical scope.
393fec97	53
5de28535	54	Perl normally assumes character semantics in the presence of character
	55	data (i.e. data that has come from a source that has been marked as
	56	being of a particular character encoding). When C<use bytes> is in
	57	effect, the encoding is temporarily ignored, and each string is treated
	58	as a series of bytes.
	59
	60	As an example, when Perl sees C<$x = chr(400)>, it encodes the character
c26c758b	61	in UTF-8 and stores it in $x. Then it is marked as character data, so,
5de28535	62	for instance, C<length $x> returns C<1>. However, in the scope of the
	63	C<bytes> pragma, $x is treated as a series of bytes - the bytes that make
	64	up the UTF8 encoding - and C<length $x> returns C<2>:
	65
	66	$x = chr(400);
	67	print "Length is ", length $x, "\n"; # "Length is 1"
	68	printf "Contents are %vd\n", $x; # "Contents are 400"
	69	{
579f6b36	70	use bytes; # or "require bytes; bytes::length()"
5de28535	71	print "Length is ", length $x, "\n"; # "Length is 2"
	72	printf "Contents are %vd\n", $x; # "Contents are 198.144"
	73	}
	74
579f6b36	75	chr(), ord(), substr(), index() and rindex() behave similarly.
579f6b36	76
5de28535	77	For more on the implications and differences between character
579f6b36	78	semantics and byte semantics, see L<perluniintro> and L<perlunicode>.
	79
	80	=head1 LIMITATIONS
	81
	82	bytes::substr() does not work as an lvalue().
393fec97	83
	84	=head1 SEE ALSO
	85
579f6b36	86	L<perluniintro>, L<perlunicode>, L<utf8>
5bc28da9	87
5bc28da9	88	=cut