-# $Id: Build.PL,v 1.1.1.1 2004/05/29 17:29:56 btrott Exp $
+# $Id: Build.PL 918 2004-05-29 17:29:55Z btrott $
require 'Makefile.PL';
-# $Id: Changes,v 1.7 2004/10/09 07:02:01 btrott Exp $
+# $Id: Changes 1750 2005-01-01 00:46:40Z btrott $
Revision history for XML::Feed
+0.04 2004.12.31
+ - Use "loose" parsing in DateTime::Format::Mail so that we don't die
+ on invalid RFC-822 dates.
+ - XML::Feed::Entry->link on RSS feeds will now use a <guid> element
+ if a <link> element isn't found.
+ - Switched to using URI::Fetch when fetching feeds. Since we're not
+ storing or caching feeds currently, this basically just buys us
+ GZIP support, but that's something.
+ - Use Class::ErrorHandler instead of XML::Feed::ErrorHandler. Thanks
+ to Tim Appnel for the patch.
+
0.03 2004.10.09
- Fixed bug with feed format detection: properly detect format even in
feeds with <!DOCTYPE> at the top. (Thanks to Alberto Quario for the
name: XML-Feed
-version: 0.03
+version: 0.04
abstract: XML Syndication Feed Support
-author: Benjamin Trott <cpan@stupidfool.org>
+author: Benjamin Trott <ben+cpan@stupidfool.org>
license: perl
distribution_type: module
requires:
Class::ErrorHandler: 0
Feed::Find: 0
+ URI::Fetch: 0
XML::RSS: 1.01
XML::Atom: 0.08
LWP: 0
directory:
- t
- inc
-generated_by: Module::Install version 0.35
+generated_by: Module::Install version 0.36
-# $Id: Makefile.PL,v 1.2 2004/10/04 03:38:11 btrott Exp $
+# $Id: Makefile.PL 942 2004-12-31 23:01:21Z btrott $
use inc::Module::Install;
name('XML-Feed');
abstract('XML Syndication Feed Support');
-author('Benjamin Trott <cpan@stupidfool.org>');
+author('Benjamin Trott <ben+cpan@stupidfool.org>');
version_from('lib/XML/Feed.pm');
license('perl');
no_index(directory => 't');
requires('Class::ErrorHandler');
requires('Feed::Find');
+requires('URI::Fetch');
requires('XML::RSS' => 1.01);
requires('XML::Atom' => 0.08);
requires('LWP');
-$Id: README,v 1.2 2004/10/09 07:06:11 btrott Exp $
+$Id: README 942 2004-12-31 23:01:21Z btrott $
This is XML::Feed, an abstraction above the RSS and Atom syndication
feed formats. It supports both parsing and autodiscovery of feeds.
* DateTime::Format::W3CDTF
* List::Util
* Feed::Find
+ * URI::Fetch
INSTALLATION
% make install
-Benjamin Trott / cpan@stupidfool.org
+Benjamin Trott / ben+cpan@stupidfool.org
#line 1 "inc/Module/Install.pm - /Library/Perl/5.8.1/Module/Install.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install.pm $ $Author: autrijus $
-# $Revision: #69 $ $Change: 2301 $ $DateTime: 2004/07/13 07:16:40 $ vim: expandtab shiftwidth=4
-
package Module::Install;
-$VERSION = '0.35';
+$VERSION = '0.36';
die << "." unless $INC{join('/', inc => split(/::/, __PACKAGE__)).'.pm'};
Please invoke ${\__PACKAGE__} with:
@inc::Module::Install::ISA = 'Module::Install';
*inc::Module::Install::VERSION = *VERSION;
-#line 132
+#line 129
sub import {
my $class = shift;
delete $INC{"$self->{path}.pm"};
}
-#line 159
+#line 156
sub autoload {
my $self = shift;
};
}
-#line 184
+#line 181
sub new {
my ($class, %args) = @_;
bless(\%args, $class);
}
-#line 213
+#line 210
sub call {
my $self = shift;
goto &{$obj->can($method)};
}
-#line 228
+#line 225
sub load {
my ($self, $method) = @_;
$obj;
}
-#line 258
+#line 255
sub load_extensions {
my ($self, $path, $top_obj) = @_;
}
}
-#line 282
+#line 279
sub find_extensions {
my ($self, $path) = @_;
__END__
-#line 620
+#line 617
#line 1 "inc/Module/Install/AutoInstall.pm - /Library/Perl/5.8.1/Module/Install/AutoInstall.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/AutoInstall.pm $ $Author: autrijus $
-# $Revision: #13 $ $Change: 1846 $ $DateTime: 2003/12/31 22:57:12 $ vim: expandtab shiftwidth=4
-
package Module::Install::AutoInstall;
use Module::Install::Base; @ISA = qw(Module::Install::Base);
#line 1 "inc/Module/Install/Base.pm - /Library/Perl/5.8.1/Module/Install/Base.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/Base.pm $ $Author: autrijus $
-# $Revision: #10 $ $Change: 1847 $ $DateTime: 2003/12/31 23:14:54 $ vim: expandtab shiftwidth=4
-
package Module::Install::Base;
-#line 31
+#line 28
sub new {
my ($class, %args) = @_;
bless(\%args, $class);
}
-#line 49
+#line 46
sub AUTOLOAD {
my $self = shift;
goto &{$self->_top->autoload};
}
-#line 60
+#line 57
sub _top { $_[0]->{_top} }
-#line 71
+#line 68
sub admin {
my $self = shift;
__END__
-#line 115
+#line 112
#line 1 "inc/Module/Install/Build.pm - /Library/Perl/5.8.1/Module/Install/Build.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/Build.pm $ $Author: ingy $
-# $Revision: #23 $ $Change: 1255 $ $DateTime: 2003/03/05 13:23:32 $ vim: expandtab shiftwidth=4
-
package Module::Install::Build;
$VERSION = '0.01';
use strict;
__END__
-#line 178
+#line 175
#line 1 "inc/Module/Install/Can.pm - /Library/Perl/5.8.1/Module/Install/Can.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/Can.pm $ $Author: autrijus $
-# $Revision: #6 $ $Change: 1840 $ $DateTime: 2003/12/28 19:42:02 $ vim: expandtab shiftwidth=4
-
package Module::Install::Can;
use Module::Install::Base; @ISA = qw(Module::Install::Base);
$VERSION = '0.01';
#line 1 "inc/Module/Install/Fetch.pm - /Library/Perl/5.8.1/Module/Install/Fetch.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/Fetch.pm $ $Author: autrijus $
-# $Revision: #8 $ $Change: 1374 $ $DateTime: 2003/03/18 11:50:15 $ vim: expandtab shiftwidth=4
-
package Module::Install::Fetch;
use Module::Install::Base; @ISA = qw(Module::Install::Base);
#line 1 "inc/Module/Install/Include.pm - /Library/Perl/5.8.1/Module/Install/Include.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/Include.pm $ $Author: autrijus $
-# $Revision: #9 $ $Change: 2288 $ $DateTime: 2004/07/01 04:49:12 $ vim: expandtab shiftwidth=4
-
package Module::Install::Include;
use Module::Install::Base; @ISA = qw(Module::Install::Base);
#line 1 "inc/Module/Install/Makefile.pm - /Library/Perl/5.8.1/Module/Install/Makefile.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/Makefile.pm $ $Author: autrijus $
-# $Revision: #53 $ $Change: 1847 $ $DateTime: 2003/12/31 23:14:54 $ vim: expandtab shiftwidth=4
-
package Module::Install::Makefile;
use Module::Install::Base; @ISA = qw(Module::Install::Base);
__END__
-#line 276
+#line 273
#line 1 "inc/Module/Install/Metadata.pm - /Library/Perl/5.8.1/Module/Install/Metadata.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/Metadata.pm $ $Author: autrijus $
-# $Revision: #32 $ $Change: 1885 $ $DateTime: 2004/03/11 05:55:27 $ vim: expandtab shiftwidth=4
-
package Module::Install::Metadata;
use Module::Install::Base; @ISA = qw(Module::Install::Base);
#line 1 "inc/Module/Install/Win32.pm - /Library/Perl/5.8.1/Module/Install/Win32.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/Win32.pm $ $Author: autrijus $
-# $Revision: #9 $ $Change: 1789 $ $DateTime: 2003/11/11 01:22:54 $ vim: expandtab shiftwidth=4
-
package Module::Install::Win32;
use Module::Install::Base; @ISA = qw(Module::Install::Base);
#line 1 "inc/Module/Install/WriteAll.pm - /Library/Perl/5.8.1/Module/Install/WriteAll.pm"
-# $File: //depot/cpan/Module-Install/lib/Module/Install/WriteAll.pm $ $Author: autrijus $
-# $Revision: #3 $ $Change: 1885 $ $DateTime: 2004/03/11 05:55:27 $ vim: expandtab shiftwidth=4
-
package Module::Install::WriteAll;
use Module::Install::Base; @ISA = qw(Module::Install::Base);
-# $Id: Feed.pm,v 1.10 2004/10/09 07:05:08 btrott Exp $
+# $Id: Feed.pm 942 2004-12-31 23:01:21Z btrott $
package XML::Feed;
use strict;
use base qw( Class::ErrorHandler );
-use LWP::UserAgent;
-use HTML::Parser;
use Feed::Find;
+use URI::Fetch;
-use vars qw( $VERSION );
-$VERSION = '0.03';
+our $VERSION = '0.04';
sub parse {
my $class = shift;
my $feed = bless {}, $class;
my $xml = '';
if (UNIVERSAL::isa($stream, 'URI')) {
- my $ua = LWP::UserAgent->new;
- my $req = HTTP::Request->new(GET => $stream);
- my $res = $ua->request($req);
- if ($res->is_success) {
- $xml = $res->content;
- }
+ my $res = URI::Fetch->fetch($stream)
+ or return $class->error(URI::Fetch->errstr);
+ return $class->error("This feed has been permanently removed")
+ if $res->status == URI::Fetch::FEED_GONE();
+ $xml = $res->content;
} elsif (ref($stream) eq 'SCALAR') {
$xml = $$stream;
} elsif (ref($stream)) {
}
return $class->error("Can't get feed XML content from $stream")
unless $xml;
+ my $format = $feed->identify_format(\$xml)
+ or return $class->error($feed->errstr);
+ my $format_class = join '::', __PACKAGE__, $format;
+ eval "use $format_class";
+ return $class->error("Unsupported format $format: $@") if $@;
+ bless $feed, $format_class;
+ $feed->init_string(\$xml) or return $class->error($feed->errstr);
+ $feed;
+}
+
+sub identify_format {
+ my $feed = shift;
+ my($xml) = @_;
## Auto-detect feed type based on first element. This is prone
## to breakage, but then again we don't want to parse the whole
## feed ourselves.
my $tag;
- while ($xml =~ /<(\S+)/sg) {
+ while ($$xml =~ /<(\S+)/sg) {
(my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd;
my $first = substr $t, 0, 1;
$tag = $t, last unless $first eq '?' || $first eq '!';
}
- return $class->error("Cannot find first element") unless $tag;
+ return $feed->error("Cannot find first element") unless $tag;
$tag =~ s/^.*://;
if ($tag eq 'rss' || $tag eq 'RDF') {
- require XML::Feed::RSS;
- bless $feed, 'XML::Feed::RSS';
+ return 'RSS';
} elsif ($tag eq 'feed') {
- require XML::Feed::Atom;
- bless $feed, 'XML::Feed::Atom';
+ return 'Atom';
} else {
- return $class->error("Cannot detect feed type");
+ return $feed->error("Cannot detect feed type");
}
- $feed->init_string($xml) or return;
- $feed;
}
sub find_feeds {
=head1 AUTHOR & COPYRIGHT
Except where otherwise noted, I<XML::Feed> is Copyright 2004 Benjamin
-Trott, cpan@stupidfool.org. All rights reserved.
+Trott, ben+cpan@stupidfool.org. All rights reserved.
=cut
-# $Id: Atom.pm,v 1.2 2004/06/20 15:20:37 btrott Exp $
+# $Id: Atom.pm 942 2004-12-31 23:01:21Z btrott $
package XML::Feed::Atom;
use strict;
sub init_string {
my $feed = shift;
my($str) = @_;
- $feed->{atom} = XML::Atom::Feed->new(Stream => \$str)
- or return $feed->error(XML::Atom::Feed->errstr);
+ if ($str) {
+ $feed->{atom} = XML::Atom::Feed->new(Stream => $str)
+ or return $feed->error(XML::Atom::Feed->errstr);
+ }
$feed;
}
-# $Id: Content.pm,v 1.2 2004/10/04 03:38:11 btrott Exp $
+# $Id: Content.pm 937 2004-10-04 03:38:11Z btrott $
package XML::Feed::Content;
use strict;
-# $Id: Entry.pm,v 1.3 2004/07/29 16:42:29 btrott Exp $
+# $Id: Entry.pm 942 2004-12-31 23:01:21Z btrott $
package XML::Feed::Entry;
use strict;
=head2 $entry->link
The permalink of the entry, in most cases, except in cases where it points
-instead of an offsite URI referenced in the entry.
+instead to an offsite URI referenced in the entry.
=head2 $entry->content
-# $Id: RSS.pm,v 1.5 2004/07/29 16:42:29 btrott Exp $
+# $Id: RSS.pm 942 2004-12-31 23:01:21Z btrott $
package XML::Feed::RSS;
use strict;
my($str) = @_;
my $rss = $feed->{rss} = XML::RSS->new;
if ($str) {
- $rss->parse($str);
+ $rss->parse($$str);
}
$feed;
}
use base qw( XML::Feed::Entry );
sub title { $_[0]->{entry}{title} }
-sub link { $_[0]->{entry}{link} }
+sub link { $_[0]->{entry}{link} || $_[0]->{entry}{guid} }
sub summary {
my $item = $_[0]->{entry};
sub issued {
if (my $ts = $_[0]->{entry}{pubDate}) {
- return DateTime::Format::Mail->parse_datetime($ts);
+ my $parser = DateTime::Format::Mail->new;
+ $parser->loose;
+ return $parser->parse_datetime($ts);
} elsif ($ts = $_[0]->{entry}{dc}{date}) {
return DateTime::Format::W3CDTF->parse_datetime($ts);
}
-# $Id: 00-compile.t,v 1.1 2004/05/29 18:19:50 btrott Exp $
+# $Id: 00-compile.t 922 2004-05-29 18:19:50Z btrott $
my $loaded;
BEGIN { print "1..1\n" }
-# $Id: 01-parse.t,v 1.5 2004/07/29 16:43:33 btrott Exp $
+# $Id: 01-parse.t 933 2004-07-29 16:43:33Z btrott $
use strict;
use Test;