X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FGitalist.git;a=blobdiff_plain;f=local-lib5%2Flib%2Fperl5%2FPPI%2FToken%2FBOM.pm;fp=local-lib5%2Flib%2Fperl5%2FPPI%2FToken%2FBOM.pm;h=0736000dc1e579831133bdc95b14d5c23216d149;hp=0000000000000000000000000000000000000000;hb=3fea05b9fbf95091f4522528b9980a33e0235603;hpb=af746827daa7a8feccee889e1d12ebc74cc9201e diff --git a/local-lib5/lib/perl5/PPI/Token/BOM.pm b/local-lib5/lib/perl5/PPI/Token/BOM.pm new file mode 100644 index 0000000..0736000 --- /dev/null +++ b/local-lib5/lib/perl5/PPI/Token/BOM.pm @@ -0,0 +1,115 @@ +package PPI::Token::BOM; + +=pod + +=head1 NAME + +PPI::Token::BOM - Tokens representing Unicode byte order marks + +=head1 INHERITANCE + + PPI::Token::BOM + isa PPI::Token + isa PPI::Element + +=head1 DESCRIPTION + +This is a special token in that it can only occur at the beginning of +documents. If a BOM byte mark occurs elsewhere in a file, it should +be treated as L. We recognize the byte order +marks identified at this URL: +L + + UTF-32, big-endian 00 00 FE FF + UTF-32, little-endian FF FE 00 00 + UTF-16, big-endian FE FF + UTF-16, little-endian FF FE + UTF-8 EF BB BF + +Note that as of this writing, PPI only has support for UTF-8 +(namely, in POD and strings) and no support for UTF-16 or UTF-32. We +support the BOMs of the latter two for completeness only. + +The BOM is considered non-significant, like white space. + +=head1 METHODS + +There are no additional methods beyond those provided by the parent +L and L classes. + +=cut + +use strict; +use PPI::Token (); + +use vars qw{$VERSION @ISA}; +BEGIN { + $VERSION = '1.206'; + @ISA = 'PPI::Token'; +} + +sub significant { '' } + + + + + +##################################################################### +# Parsing Methods + +my %bom_types = ( + "\x00\x00\xfe\xff" => 'UTF-32', + "\xff\xfe\x00\x00" => 'UTF-32', + "\xfe\xff" => 'UTF-16', + "\xff\xfe" => 'UTF-16', + "\xef\xbb\xbf" => 'UTF-8', +); + +sub __TOKENIZER__on_line_start { + my $t = $_[1]; + $_ = $t->{line}; + + if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian + \xff\xfe\x00\x00 | # UTF-32, little-endian + \xfe\xff | # UTF-16, big-endian + \xff\xfe | # UTF-16, little-endian + \xef\xbb\xbf) # UTF-8 + /xs) { + my $bom = $1; + + if ($bom_types{$bom} ne 'UTF-8') { + return $t->_error("$bom_types{$bom} is not supported"); + } + + $t->_new_token('BOM', $bom) or return undef; + $t->{line_cursor} += length $bom; + } + + # Continue just as if there was no BOM + $t->{class} = 'PPI::Token::Whitespace'; + return $t->{class}->__TOKENIZER__on_line_start($t); +} + +1; + +=pod + +=head1 SUPPORT + +See the L in the main module + +=head1 AUTHOR + +Chris Dolan Ecdolan@cpan.orgE + +=head1 COPYRIGHT + +Copyright 2001 - 2009 Adam Kennedy. + +This program is free software; you can redistribute +it and/or modify it under the same terms as Perl itself. + +The full text of the license can be found in the +LICENSE file included with this module. + +=cut