[p5sagit/p5-mst-13.2.git] / lib / bytes.pm

package bytes;

our $VERSION = '1.01';

$bytes::hint_bits = 0x00000008;

sub import {
    $^H |= $bytes::hint_bits;
}

sub unimport {
    $^H &= ~$bytes::hint_bits;
}

sub AUTOLOAD {
    require "bytes_heavy.pl";
    goto &$AUTOLOAD;
}

sub length ($);
sub chr ($);
sub ord ($);
sub substr ($$;$$);
sub index ($$;$);
sub rindex ($$;$);

1;
__END__

=head1 NAME

bytes - Perl pragma to force byte semantics rather than character semantics

=head1 SYNOPSIS

    use bytes;
    ... chr(...);       # or bytes::chr
    ... index(...);     # or bytes::index
    ... length(...);    # or bytes::length
    ... ord(...);       # or bytes::ord
    ... rindex(...);    # or bytes::rindex
    ... substr(...);    # or bytes::substr
    no bytes;


=head1 DESCRIPTION

The C<use bytes> pragma disables character semantics for the rest of the
lexical scope in which it appears.  C<no bytes> can be used to reverse
the effect of C<use bytes> within the current lexical scope.

Perl normally assumes character semantics in the presence of character
data (i.e. data that has come from a source that has been marked as
being of a particular character encoding). When C<use bytes> is in
effect, the encoding is temporarily ignored, and each string is treated
as a series of bytes. 

As an example, when Perl sees C<$x = chr(400)>, it encodes the character
in UTF-8 and stores it in $x. Then it is marked as character data, so,
for instance, C<length $x> returns C<1>. However, in the scope of the
C<bytes> pragma, $x is treated as a series of bytes - the bytes that make
up the UTF8 encoding - and C<length $x> returns C<2>:

    $x = chr(400);
    print "Length is ", length $x, "\n";     # "Length is 1"
    printf "Contents are %vd\n", $x;         # "Contents are 400"
    { 
        use bytes; # or "require bytes; bytes::length()"
        print "Length is ", length $x, "\n"; # "Length is 2"
        printf "Contents are %vd\n", $x;     # "Contents are 198.144"
    }

chr(), ord(), substr(), index() and rindex() behave similarly.

For more on the implications and differences between character
semantics and byte semantics, see L<perluniintro> and L<perlunicode>.

=head1 LIMITATIONS

bytes::substr() does not work as an lvalue().

=head1 SEE ALSO

L<perluniintro>, L<perlunicode>, L<utf8>

=cut
Commit	Line	Data
657b208b	1	package bytes;
5bc28da9	2
65016084	3	our $VERSION = '1.01';
b75c8c73	4
d5448623	5	$bytes::hint_bits = 0x00000008;
d5448623	6
5bc28da9	7	sub import {
d5448623	8	$^H \|= $bytes::hint_bits;
5bc28da9	9	}
	10
	11	sub unimport {
d5448623	12	$^H &= ~$bytes::hint_bits;
5bc28da9	13	}
	14
	15	sub AUTOLOAD {
657b208b	16	require "bytes_heavy.pl";
5bc28da9	17	goto &$AUTOLOAD;
	18	}
	19
	20	sub length ($);
579f6b36	21	sub chr ($);
	22	sub ord ($);
	23	sub substr ($$;$$);
	24	sub index ($$;$);
	25	sub rindex ($$;$);
5bc28da9	26
	27	1;
	28	__END__
	29
	30	=head1 NAME
	31
657b208b	32	bytes - Perl pragma to force byte semantics rather than character semantics
5bc28da9	33
	34	=head1 SYNOPSIS
	35
657b208b	36	use bytes;
579f6b36	37	... chr(...); # or bytes::chr
	38	... index(...); # or bytes::index
	39	... length(...); # or bytes::length
	40	... ord(...); # or bytes::ord
	41	... rindex(...); # or bytes::rindex
	42	... substr(...); # or bytes::substr
657b208b	43	no bytes;
5bc28da9	44
579f6b36	45
5bc28da9	46	=head1 DESCRIPTION
5bc28da9	47
657b208b	48	The C<use bytes> pragma disables character semantics for the rest of the
	49	lexical scope in which it appears. C<no bytes> can be used to reverse
	50	the effect of C<use bytes> within the current lexical scope.
393fec97	51
5de28535	52	Perl normally assumes character semantics in the presence of character
	53	data (i.e. data that has come from a source that has been marked as
	54	being of a particular character encoding). When C<use bytes> is in
	55	effect, the encoding is temporarily ignored, and each string is treated
	56	as a series of bytes.
	57
	58	As an example, when Perl sees C<$x = chr(400)>, it encodes the character
c26c758b	59	in UTF-8 and stores it in $x. Then it is marked as character data, so,
5de28535	60	for instance, C<length $x> returns C<1>. However, in the scope of the
	61	C<bytes> pragma, $x is treated as a series of bytes - the bytes that make
	62	up the UTF8 encoding - and C<length $x> returns C<2>:
	63
	64	$x = chr(400);
	65	print "Length is ", length $x, "\n"; # "Length is 1"
	66	printf "Contents are %vd\n", $x; # "Contents are 400"
	67	{
579f6b36	68	use bytes; # or "require bytes; bytes::length()"
5de28535	69	print "Length is ", length $x, "\n"; # "Length is 2"
	70	printf "Contents are %vd\n", $x; # "Contents are 198.144"
	71	}
	72
579f6b36	73	chr(), ord(), substr(), index() and rindex() behave similarly.
579f6b36	74
5de28535	75	For more on the implications and differences between character
579f6b36	76	semantics and byte semantics, see L<perluniintro> and L<perlunicode>.
	77
	78	=head1 LIMITATIONS
	79
	80	bytes::substr() does not work as an lvalue().
393fec97	81
	82	=head1 SEE ALSO
	83
579f6b36	84	L<perluniintro>, L<perlunicode>, L<utf8>
5bc28da9	85
5bc28da9	86	=cut