X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FGitalist.git;a=blobdiff_plain;f=local-lib5%2Flib%2Fperl5%2FPPI%2FToken%2FBOM.pm;fp=local-lib5%2Flib%2Fperl5%2FPPI%2FToken%2FBOM.pm;h=0736000dc1e579831133bdc95b14d5c23216d149;hp=0000000000000000000000000000000000000000;hb=3fea05b9fbf95091f4522528b9980a33e0235603;hpb=af746827daa7a8feccee889e1d12ebc74cc9201e

diff --git a/local-lib5/lib/perl5/PPI/Token/BOM.pm b/local-lib5/lib/perl5/PPI/Token/BOM.pm
new file mode 100644
index 0000000..0736000
--- /dev/null
+++ b/local-lib5/lib/perl5/PPI/Token/BOM.pm
@@ -0,0 +1,115 @@
+package PPI::Token::BOM;
+
+=pod
+
+=head1 NAME
+
+PPI::Token::BOM - Tokens representing Unicode byte order marks
+
+=head1 INHERITANCE
+
+  PPI::Token::BOM
+  isa PPI::Token
+      isa PPI::Element
+
+=head1 DESCRIPTION
+
+This is a special token in that it can only occur at the beginning of
+documents.  If a BOM byte mark occurs elsewhere in a file, it should
+be treated as L<PPI::Token::Whitespace>.  We recognize the byte order
+marks identified at this URL:
+L<http://www.unicode.org/faq/utf_bom.html#BOM>
+
+    UTF-32, big-endian     00 00 FE FF
+    UTF-32, little-endian  FF FE 00 00
+    UTF-16, big-endian     FE FF
+    UTF-16, little-endian  FF FE
+    UTF-8                  EF BB BF
+
+Note that as of this writing, PPI only has support for UTF-8
+(namely, in POD and strings) and no support for UTF-16 or UTF-32.  We
+support the BOMs of the latter two for completeness only.
+
+The BOM is considered non-significant, like white space.
+
+=head1 METHODS
+
+There are no additional methods beyond those provided by the parent
+L<PPI::Token> and L<PPI::Element> classes.
+
+=cut
+
+use strict;
+use PPI::Token ();
+
+use vars qw{$VERSION @ISA};
+BEGIN {
+	$VERSION = '1.206';
+	@ISA     = 'PPI::Token';
+}
+
+sub significant { '' }
+
+
+
+
+
+#####################################################################
+# Parsing Methods
+
+my %bom_types = (
+   "\x00\x00\xfe\xff" => 'UTF-32',
+   "\xff\xfe\x00\x00" => 'UTF-32',
+   "\xfe\xff"         => 'UTF-16',
+   "\xff\xfe"         => 'UTF-16',
+   "\xef\xbb\xbf"     => 'UTF-8',
+);
+
+sub __TOKENIZER__on_line_start {
+	my $t = $_[1];
+	$_ = $t->{line};
+
+	if (m/^(\x00\x00\xfe\xff |  # UTF-32, big-endian
+		\xff\xfe\x00\x00 |  # UTF-32, little-endian
+		\xfe\xff         |  # UTF-16, big-endian
+		\xff\xfe         |  # UTF-16, little-endian
+		\xef\xbb\xbf)       # UTF-8
+	    /xs) {
+	   my $bom = $1;
+
+	   if ($bom_types{$bom} ne 'UTF-8') {
+	      return $t->_error("$bom_types{$bom} is not supported");
+	   }
+
+	   $t->_new_token('BOM', $bom) or return undef;
+	   $t->{line_cursor} += length $bom;
+	}
+
+	# Continue just as if there was no BOM
+	$t->{class} = 'PPI::Token::Whitespace';
+	return $t->{class}->__TOKENIZER__on_line_start($t);
+}
+
+1;
+
+=pod
+
+=head1 SUPPORT
+
+See the L<support section|PPI/SUPPORT> in the main module
+
+=head1 AUTHOR
+
+Chris Dolan E<lt>cdolan@cpan.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright 2001 - 2009 Adam Kennedy.
+
+This program is free software; you can redistribute
+it and/or modify it under the same terms as Perl itself.
+
+The full text of the license can be found in the
+LICENSE file included with this module.
+
+=cut