doc f7abe7
[p5sagit/p5-mst-13.2.git] / lib / bytes.pm
CommitLineData
657b208b 1package bytes;
5bc28da9 2
a515200d 3our $VERSION = '1.04';
b75c8c73 4
d5448623 5$bytes::hint_bits = 0x00000008;
6
5bc28da9 7sub import {
d5448623 8 $^H |= $bytes::hint_bits;
5bc28da9 9}
10
11sub unimport {
d5448623 12 $^H &= ~$bytes::hint_bits;
5bc28da9 13}
14
15sub AUTOLOAD {
657b208b 16 require "bytes_heavy.pl";
5b5a256a 17 goto &$AUTOLOAD if defined &$AUTOLOAD;
18 require Carp;
19 Carp::croak("Undefined subroutine $AUTOLOAD called");
5bc28da9 20}
21
79077e6c 22sub length (_);
23sub chr (_);
24sub ord (_);
579f6b36 25sub substr ($$;$$);
26sub index ($$;$);
27sub rindex ($$;$);
5bc28da9 28
291;
30__END__
31
32=head1 NAME
33
657b208b 34bytes - Perl pragma to force byte semantics rather than character semantics
5bc28da9 35
490aa361 36=head1 NOTICE
a515200d 37
38This pragma reflects early attempts to incorporate Unicode into perl and
677a2954 39has since been superseded. It breaks encapsulation (i.e. it exposes the
40innards of how the perl executable currently happens to store a string),
41and use of this module for anything other than debugging purposes is
42strongly discouraged. If you feel that the functions here within might be
a42901af 43useful for your application, this possibly indicates a mismatch between
677a2954 44your mental model of Perl Unicode and the current reality. In that case,
04c2c53e 45you may wish to read some of the perl Unicode documentation:
677a2954 46L<perluniintro>, L<perlunitut>, L<perlunifaq> and L<perlunicode>.
a515200d 47
5bc28da9 48=head1 SYNOPSIS
49
657b208b 50 use bytes;
579f6b36 51 ... chr(...); # or bytes::chr
52 ... index(...); # or bytes::index
53 ... length(...); # or bytes::length
54 ... ord(...); # or bytes::ord
55 ... rindex(...); # or bytes::rindex
56 ... substr(...); # or bytes::substr
657b208b 57 no bytes;
5bc28da9 58
579f6b36 59
5bc28da9 60=head1 DESCRIPTION
61
657b208b 62The C<use bytes> pragma disables character semantics for the rest of the
63lexical scope in which it appears. C<no bytes> can be used to reverse
64the effect of C<use bytes> within the current lexical scope.
393fec97 65
5de28535 66Perl normally assumes character semantics in the presence of character
67data (i.e. data that has come from a source that has been marked as
68being of a particular character encoding). When C<use bytes> is in
69effect, the encoding is temporarily ignored, and each string is treated
70as a series of bytes.
71
72As an example, when Perl sees C<$x = chr(400)>, it encodes the character
c26c758b 73in UTF-8 and stores it in $x. Then it is marked as character data, so,
5de28535 74for instance, C<length $x> returns C<1>. However, in the scope of the
75C<bytes> pragma, $x is treated as a series of bytes - the bytes that make
76up the UTF8 encoding - and C<length $x> returns C<2>:
77
78 $x = chr(400);
79 print "Length is ", length $x, "\n"; # "Length is 1"
80 printf "Contents are %vd\n", $x; # "Contents are 400"
81 {
579f6b36 82 use bytes; # or "require bytes; bytes::length()"
5de28535 83 print "Length is ", length $x, "\n"; # "Length is 2"
84 printf "Contents are %vd\n", $x; # "Contents are 198.144"
85 }
86
579f6b36 87chr(), ord(), substr(), index() and rindex() behave similarly.
88
5de28535 89For more on the implications and differences between character
579f6b36 90semantics and byte semantics, see L<perluniintro> and L<perlunicode>.
91
92=head1 LIMITATIONS
93
94bytes::substr() does not work as an lvalue().
393fec97 95
96=head1 SEE ALSO
97
579f6b36 98L<perluniintro>, L<perlunicode>, L<utf8>
5bc28da9 99
100=cut