fix prose from previous lib/bytes.pm doc update
[p5sagit/p5-mst-13.2.git] / lib / bytes.pm
CommitLineData
657b208b 1package bytes;
5bc28da9 2
a515200d 3our $VERSION = '1.04';
b75c8c73 4
d5448623 5$bytes::hint_bits = 0x00000008;
6
5bc28da9 7sub import {
d5448623 8 $^H |= $bytes::hint_bits;
5bc28da9 9}
10
11sub unimport {
d5448623 12 $^H &= ~$bytes::hint_bits;
5bc28da9 13}
14
15sub AUTOLOAD {
657b208b 16 require "bytes_heavy.pl";
5b5a256a 17 goto &$AUTOLOAD if defined &$AUTOLOAD;
18 require Carp;
19 Carp::croak("Undefined subroutine $AUTOLOAD called");
5bc28da9 20}
21
79077e6c 22sub length (_);
23sub chr (_);
24sub ord (_);
579f6b36 25sub substr ($$;$$);
26sub index ($$;$);
27sub rindex ($$;$);
5bc28da9 28
291;
30__END__
31
32=head1 NAME
33
657b208b 34bytes - Perl pragma to force byte semantics rather than character semantics
5bc28da9 35
a515200d 36=head1 DEPRECATED
37
38This pragma reflects early attempts to incorporate Unicode into perl and
39has since been superceded. It breaks encapulation (i.e. it exposes the
40innards of how perl happens to be currently storing a string), and use of
41this module for anything other than debugging purposes is strongly
42discouraged. If you feel that the functions here within might be useful
43for your application, this possibly indicates a mismatch beteen your
44mental model of Perl Unicode and the current reality. In that case, you
c9ddd1e9 45may wish to peruse some of the perl Unicode documentation: L<perluniintro>,
a515200d 46L<perlunitut>, L<perlunifaq> and L<perlunicode>.
47
5bc28da9 48=head1 SYNOPSIS
49
657b208b 50 use bytes;
579f6b36 51 ... chr(...); # or bytes::chr
52 ... index(...); # or bytes::index
53 ... length(...); # or bytes::length
54 ... ord(...); # or bytes::ord
55 ... rindex(...); # or bytes::rindex
56 ... substr(...); # or bytes::substr
657b208b 57 no bytes;
5bc28da9 58
579f6b36 59
5bc28da9 60=head1 DESCRIPTION
61
657b208b 62The C<use bytes> pragma disables character semantics for the rest of the
63lexical scope in which it appears. C<no bytes> can be used to reverse
64the effect of C<use bytes> within the current lexical scope.
393fec97 65
5de28535 66Perl normally assumes character semantics in the presence of character
67data (i.e. data that has come from a source that has been marked as
68being of a particular character encoding). When C<use bytes> is in
69effect, the encoding is temporarily ignored, and each string is treated
70as a series of bytes.
71
72As an example, when Perl sees C<$x = chr(400)>, it encodes the character
c26c758b 73in UTF-8 and stores it in $x. Then it is marked as character data, so,
5de28535 74for instance, C<length $x> returns C<1>. However, in the scope of the
75C<bytes> pragma, $x is treated as a series of bytes - the bytes that make
76up the UTF8 encoding - and C<length $x> returns C<2>:
77
78 $x = chr(400);
79 print "Length is ", length $x, "\n"; # "Length is 1"
80 printf "Contents are %vd\n", $x; # "Contents are 400"
81 {
579f6b36 82 use bytes; # or "require bytes; bytes::length()"
5de28535 83 print "Length is ", length $x, "\n"; # "Length is 2"
84 printf "Contents are %vd\n", $x; # "Contents are 198.144"
85 }
86
579f6b36 87chr(), ord(), substr(), index() and rindex() behave similarly.
88
5de28535 89For more on the implications and differences between character
579f6b36 90semantics and byte semantics, see L<perluniintro> and L<perlunicode>.
91
92=head1 LIMITATIONS
93
94bytes::substr() does not work as an lvalue().
393fec97 95
96=head1 SEE ALSO
97
579f6b36 98L<perluniintro>, L<perlunicode>, L<utf8>
5bc28da9 99
100=cut