1 #line 1 "inc/HTML/TokeParser.pm - /System/Library/Perl/Extras/5.8.6/darwin-thread-multi-2level/HTML/TokeParser.pm"
2 package HTML::TokeParser;
4 # $Id: TokeParser.pm,v 2.28 2003/10/14 10:11:05 gisle Exp $
6 require HTML::PullParser;
7 @ISA=qw(HTML::PullParser);
8 $VERSION = sprintf("%d.%02d", q$Revision: 2.28 $ =~ /(\d+)\.(\d+)/);
12 use HTML::Entities qw(decode_entities);
17 start => "'S',tagname,attr,attrseq,text",
18 end => "'E',tagname,text",
19 text => "'T',text,is_cdata",
20 process => "'PI',token0,text",
21 comment => "'C',text",
22 declaration => "'D',text",
31 my $type = (ref($_[0]) eq "SCALAR") ? "doc" : "file";
32 %cnf = ($type => $_[0]);
38 my $textify = delete $cnf{textify} || {img => "alt", applet => "alt"};
40 my $self = $class->SUPER::new(%cnf, %ARGS) || return undef;
42 $self->{textify} = $textify;
52 $token = $self->get_token || return undef;
53 my $type = shift @$token;
54 next unless $type eq "S" || $type eq "E";
55 substr($token->[0], 0, 0) = "/" if $type eq "E";
56 return $token unless @_;
58 return $token if $token->[0] eq $_;
65 my($self, $token) = @_;
66 my $tag = $token->[1];
67 return undef unless exists $self->{textify}{$tag};
69 my $alt = $self->{textify}{$tag};
72 $text = &$alt(@$token);
74 $text = $token->[2]{$alt || "alt"};
75 $text = "[\U$tag]" unless defined $text;
85 while (my $token = $self->get_token) {
86 my $type = $token->[0];
88 my $text = $token->[1];
89 decode_entities($text) unless $token->[2];
91 } elsif ($type =~ /^[SE]$/) {
92 my $tag = $token->[1];
94 if (defined(my $text = _textify($self, $token))) {
101 if (!@_ || grep $_ eq $tag, @_) {
102 $self->unget_token($token);
106 if $tag eq "br" || !$HTML::Tagset::isPhraseMarkup{$token->[1]};
116 my $text = $self->get_text(@_);
117 $text =~ s/^\s+//; $text =~ s/\s+$//; $text =~ s/\s+/ /g;
124 while (my $token = $self->get_token) {
125 my $type = $token->[0];
127 my $text = $token->[1];
128 decode_entities($text) unless $token->[2];
130 } elsif ($type =~ /^[SE]$/) {
131 my $tag = $token->[1];
133 if (defined(my $text = _textify($self, $token))) {
138 if (!$HTML::Tagset::isPhraseMarkup{$tag}) {
139 $self->unget_token($token);
142 push(@text, " ") if $tag eq "br";
145 my $text = join("", @text);
146 $text =~ s/^\s+//; $text =~ s/\s+$//; $text =~ s/\s+/ /g;