--- /dev/null
+.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.3
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sh \" Subsection heading
+.br
+.if t .Sp
+.ne 5
+.PP
+\fB\\$1\fR
+.PP
+..
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. | will give a
+.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
+.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
+.\" expand to `' in nroff, nothing in troff, for use with C<>.
+.tr \(*W-|\(bv\*(Tr
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+'br\}
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.if \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. nr % 0
+. rr F
+.\}
+.\"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.hy 0
+.if n .na
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "HTML::TokeParser 3"
+.TH HTML::TokeParser 3 "2008-04-04" "perl v5.8.7" "User Contributed Perl Documentation"
+.SH "NAME"
+HTML::TokeParser \- Alternative HTML::Parser interface
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 4
+\& require HTML::TokeParser;
+\& $p = HTML::TokeParser\->new("index.html") ||
+\& die "Can't open: $!";
+\& $p\->empty_element_tags(1); # configure its behaviour
+.Ve
+.PP
+.Vb 3
+\& while (my $token = $p\->get_token) {
+\& #...
+\& }
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+The \f(CW\*(C`HTML::TokeParser\*(C'\fR is an alternative interface to the
+\&\f(CW\*(C`HTML::Parser\*(C'\fR class. It is an \f(CW\*(C`HTML::PullParser\*(C'\fR subclass with a
+predeclared set of token types. If you wish the tokens to be reported
+differently you probably want to use the \f(CW\*(C`HTML::PullParser\*(C'\fR directly.
+.PP
+The following methods are available:
+.ie n .IP "$p = HTML::TokeParser\->new( $filename\fR, \f(CW%opt );" 4
+.el .IP "$p = HTML::TokeParser\->new( \f(CW$filename\fR, \f(CW%opt\fR );" 4
+.IX Item "$p = HTML::TokeParser->new( $filename, %opt );"
+.PD 0
+.ie n .IP "$p = HTML::TokeParser\->new( $filehandle\fR, \f(CW%opt );" 4
+.el .IP "$p = HTML::TokeParser\->new( \f(CW$filehandle\fR, \f(CW%opt\fR );" 4
+.IX Item "$p = HTML::TokeParser->new( $filehandle, %opt );"
+.ie n .IP "$p = HTML::TokeParser\->new( \e$document, %opt );" 4
+.el .IP "$p = HTML::TokeParser\->new( \e$document, \f(CW%opt\fR );" 4
+.IX Item "$p = HTML::TokeParser->new( $document, %opt );"
+.PD
+The object constructor argument is either a file name, a file handle
+object, or the complete document to be parsed. Extra options can be
+provided as key/value pairs and are processed as documented by the base
+classes.
+.Sp
+If the argument is a plain scalar, then it is taken as the name of a
+file to be opened and parsed. If the file can't be opened for
+reading, then the constructor will return \f(CW\*(C`undef\*(C'\fR and $! will tell
+you why it failed.
+.Sp
+If the argument is a reference to a plain scalar, then this scalar is
+taken to be the literal document to parse. The value of this
+scalar should not be changed before all tokens have been extracted.
+.Sp
+Otherwise the argument is taken to be some object that the
+\&\f(CW\*(C`HTML::TokeParser\*(C'\fR can \fIread()\fR from when it needs more data. Typically
+it will be a filehandle of some kind. The stream will be \fIread()\fR until
+\&\s-1EOF\s0, but not closed.
+.Sp
+A newly constructed \f(CW\*(C`HTML::TokeParser\*(C'\fR differ from its base classes
+by having the \f(CW\*(C`unbroken_text\*(C'\fR attribute enabled by default. See
+HTML::Parser for a description of this and other attributes that
+influence how the document is parsed. It is often a good idea to enable
+\&\f(CW\*(C`empty_element_tags\*(C'\fR behaviour.
+.Sp
+Note that the parsing result will likely not be valid if raw undecoded
+\&\s-1UTF\-8\s0 is used as a source. When parsing \s-1UTF\-8\s0 encoded files turn
+on \s-1UTF\-8\s0 decoding:
+.Sp
+.Vb 3
+\& open(my $fh, "<:utf8", "index.html") || die "Can't open 'index.html': $!";
+\& my $p = HTML::TokeParser\->new( $fh );
+\& # ...
+.Ve
+.Sp
+If a \f(CW$filename\fR is passed to the constructor the file will be opened in
+raw mode and the parsing result will only be valid if its content is
+Latin\-1 or pure \s-1ASCII\s0.
+.Sp
+If parsing from an \s-1UTF\-8\s0 encoded string buffer decode it first:
+.Sp
+.Vb 3
+\& utf8::decode($document);
+\& my $p = HTML::TokeParser\->new( \e$document );
+\& # ...
+.Ve
+.IP "$p\->get_token" 4
+.IX Item "$p->get_token"
+This method will return the next \fItoken\fR found in the \s-1HTML\s0 document,
+or \f(CW\*(C`undef\*(C'\fR at the end of the document. The token is returned as an
+array reference. The first element of the array will be a string
+denoting the type of this token: \*(L"S\*(R" for start tag, \*(L"E\*(R" for end tag,
+\&\*(L"T\*(R" for text, \*(L"C\*(R" for comment, \*(L"D\*(R" for declaration, and \*(L"\s-1PI\s0\*(R" for
+process instructions. The rest of the token array depend on the type
+like this:
+.Sp
+.Vb 6
+\& ["S", $tag, $attr, $attrseq, $text]
+\& ["E", $tag, $text]
+\& ["T", $text, $is_data]
+\& ["C", $text]
+\& ["D", $text]
+\& ["PI", $token0, $text]
+.Ve
+.Sp
+where \f(CW$attr\fR is a hash reference, \f(CW$attrseq\fR is an array reference and
+the rest are plain scalars. The \*(L"Argspec\*(R" in HTML::Parser explains the
+details.
+.ie n .IP "$p\->unget_token( @tokens )" 4
+.el .IP "$p\->unget_token( \f(CW@tokens\fR )" 4
+.IX Item "$p->unget_token( @tokens )"
+If you find you have read too many tokens you can push them back,
+so that they are returned the next time \f(CW$p\fR\->get_token is called.
+.IP "$p\->get_tag" 4
+.IX Item "$p->get_tag"
+.PD 0
+.ie n .IP "$p\->get_tag( @tags )" 4
+.el .IP "$p\->get_tag( \f(CW@tags\fR )" 4
+.IX Item "$p->get_tag( @tags )"
+.PD
+This method returns the next start or end tag (skipping any other
+tokens), or \f(CW\*(C`undef\*(C'\fR if there are no more tags in the document. If
+one or more arguments are given, then we skip tokens until one of the
+specified tag types is found. For example:
+.Sp
+.Vb 1
+\& $p\->get_tag("font", "/font");
+.Ve
+.Sp
+will find the next start or end tag for a font\-element.
+.Sp
+The tag information is returned as an array reference in the same form
+as for \f(CW$p\fR\->get_token above, but the type code (first element) is
+missing. A start tag will be returned like this:
+.Sp
+.Vb 1
+\& [$tag, $attr, $attrseq, $text]
+.Ve
+.Sp
+The tagname of end tags are prefixed with \*(L"/\*(R", i.e. end tag is
+returned like this:
+.Sp
+.Vb 1
+\& ["/$tag", $text]
+.Ve
+.IP "$p\->get_text" 4
+.IX Item "$p->get_text"
+.PD 0
+.ie n .IP "$p\->get_text( @endtags )" 4
+.el .IP "$p\->get_text( \f(CW@endtags\fR )" 4
+.IX Item "$p->get_text( @endtags )"
+.PD
+This method returns all text found at the current position. It will
+return a zero length string if the next token is not text. Any
+entities will be converted to their corresponding character.
+.Sp
+If one or more arguments are given, then we return all text occurring
+before the first of the specified tags found. For example:
+.Sp
+.Vb 1
+\& $p\->get_text("p", "br");
+.Ve
+.Sp
+will return the text up to either a paragraph of linebreak element.
+.Sp
+The text might span tags that should be \fItextified\fR. This is
+controlled by the \f(CW$p\fR\->{textify} attribute, which is a hash that
+defines how certain tags can be treated as text. If the name of a
+start tag matches a key in this hash then this tag is converted to
+text. The hash value is used to specify which tag attribute to obtain
+the text from. If this tag attribute is missing, then the upper case
+name of the tag enclosed in brackets is returned, e.g. \*(L"[\s-1IMG\s0]\*(R". The
+hash value can also be a subroutine reference. In this case the
+routine is called with the start tag token content as its argument and
+the return value is treated as the text.
+.Sp
+The default \f(CW$p\fR\->{textify} value is:
+.Sp
+.Vb 1
+\& {img => "alt", applet => "alt"}
+.Ve
+.Sp
+This means that <\s-1IMG\s0> and <\s-1APPLET\s0> tags are treated as text, and that
+the text to substitute can be found in the \s-1ALT\s0 attribute.
+.IP "$p\->get_trimmed_text" 4
+.IX Item "$p->get_trimmed_text"
+.PD 0
+.ie n .IP "$p\->get_trimmed_text( @endtags )" 4
+.el .IP "$p\->get_trimmed_text( \f(CW@endtags\fR )" 4
+.IX Item "$p->get_trimmed_text( @endtags )"
+.PD
+Same as \f(CW$p\fR\->get_text above, but will collapse any sequences of white
+space to a single space character. Leading and trailing white space is
+removed.
+.IP "$p\->get_phrase" 4
+.IX Item "$p->get_phrase"
+This will return all text found at the current position ignoring any
+phrasal-level tags. Text is extracted until the first non
+phrasal-level tag. Textification of tags is the same as for
+\&\fIget_text()\fR. This method will collapse white space in the same way as
+\&\fIget_trimmed_text()\fR does.
+.Sp
+The definition of <i>phrasal\-level tags</i> is obtained from the
+HTML::Tagset module.
+.SH "EXAMPLES"
+.IX Header "EXAMPLES"
+This example extracts all links from a document. It will print one
+line for each link, containing the \s-1URL\s0 and the textual description
+between the <A>...</A> tags:
+.PP
+.Vb 2
+\& use HTML::TokeParser;
+\& $p = HTML::TokeParser\->new(shift||"index.html");
+.Ve
+.PP
+.Vb 5
+\& while (my $token = $p\->get_tag("a")) {
+\& my $url = $token\->[1]{href} || "\-";
+\& my $text = $p\->get_trimmed_text("/a");
+\& print "$url\et$text\en";
+\& }
+.Ve
+.PP
+This example extract the <\s-1TITLE\s0> from the document:
+.PP
+.Vb 6
+\& use HTML::TokeParser;
+\& $p = HTML::TokeParser\->new(shift||"index.html");
+\& if ($p\->get_tag("title")) {
+\& my $title = $p\->get_trimmed_text;
+\& print "Title: $title\en";
+\& }
+.Ve
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+HTML::PullParser, HTML::Parser
+.SH "COPYRIGHT"
+.IX Header "COPYRIGHT"
+Copyright 1998\-2005 Gisle Aas.
+.PP
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.