X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FXML%2FFeed.pm;h=b70b1bc41267fe9af289f64946c62af39d1d0ac1;hb=aa2dcd5cde4fe1d636f6228ca2a9580371673087;hp=359730e9e9654abf9daa2edd1531a7e4bf0c4697;hpb=c4d4c98e2bc6450743a68f5014a853960fe62117;p=catagits%2FXML-Feed.git diff --git a/lib/XML/Feed.pm b/lib/XML/Feed.pm index 359730e..b70b1bc 100644 --- a/lib/XML/Feed.pm +++ b/lib/XML/Feed.pm @@ -6,19 +6,26 @@ use strict; use base qw( Class::ErrorHandler ); use Feed::Find; use URI::Fetch; +use LWP::UserAgent; use Carp; - -our $VERSION = '0.12'; +use Module::Pluggable search_path => "XML::Feed::Format", + require => 1, + sub_name => 'formatters'; + +our $VERSION = '0.40'; +our @formatters; +BEGIN { + @formatters = __PACKAGE__->formatters; +} sub new { my $class = shift; - my($format) = @_; - $format ||= 'Atom'; - my $format_class = 'XML::Feed::' . $format; + my $format = shift || 'Atom'; + my $format_class = 'XML::Feed::Format::' . $format; eval "use $format_class"; Carp::croak("Unsupported format $format: $@") if $@; - my $feed = bless {}, join('::', __PACKAGE__, $format); - $feed->init_empty or return $class->error($feed->errstr); + my $feed = bless {}, join('::', __PACKAGE__, "Format", $format); + $feed->init_empty(@_) or return $class->error($feed->errstr); $feed; } @@ -26,12 +33,14 @@ sub init_empty { 1 } sub parse { my $class = shift; - my($stream) = @_; + my($stream, $specified_format) = @_; return $class->error("Stream parameter is required") unless $stream; my $feed = bless {}, $class; my $xml = ''; if (UNIVERSAL::isa($stream, 'URI')) { - my $res = URI::Fetch->fetch($stream) + my $ua = LWP::UserAgent->new; + $ua->env_proxy; # force allowing of proxies + my $res = URI::Fetch->fetch($stream, UserAgent => $ua) or return $class->error(URI::Fetch->errstr); return $class->error("This feed has been permanently removed") if $res->status == URI::Fetch::URI_GONE(); @@ -52,9 +61,14 @@ sub parse { } return $class->error("Can't get feed XML content from $stream") unless $xml; - my $format = $feed->identify_format(\$xml) - or return $class->error($feed->errstr); - my $format_class = join '::', __PACKAGE__, $format; + my $format; + if ($specified_format) { + $format = $specified_format; + } else { + $format = $feed->identify_format(\$xml) or return $class->error($feed->errstr); + } + + my $format_class = join '::', __PACKAGE__, "Format", $format; eval "use $format_class"; return $class->error("Unsupported format $format: $@") if $@; bless $feed, $format_class; @@ -63,8 +77,23 @@ sub parse { } sub identify_format { - my $feed = shift; - my($xml) = @_; + my $feed = shift; + my($xml) = @_; + foreach my $class (@formatters) { + my ($name) = ($class =~ m!([^:]+)$!); + # TODO ugly + my $tmp = $$xml; + return $name if eval { $class->identify(\$tmp) }; + return $feed->error($@) if $@; + } + return $feed->error("Cannot detect feed type"); +} + +sub _get_first_tag { + my $class = shift; + my ($xml) = @_; + + ## Auto-detect feed type based on first element. This is prone ## to breakage, but then again we don't want to parse the whole ## feed ourselves. @@ -74,15 +103,9 @@ sub identify_format { my $first = substr $t, 0, 1; $tag = $t, last unless $first eq '?' || $first eq '!'; } - return $feed->error("Cannot find first element") unless $tag; + die ("Cannot find first element") unless $tag; $tag =~ s/^.*://; - if ($tag eq 'rss' || $tag eq 'RDF') { - return 'RSS'; - } elsif ($tag eq 'feed') { - return 'Atom'; - } else { - return $feed->error("Cannot detect feed type"); - } + return $tag; } sub find_feeds { @@ -96,7 +119,7 @@ sub find_feeds { sub convert { my $feed = shift; my($format) = @_; - my $new = __PACKAGE__->new($format); + my $new = XML::Feed->new($format); for my $field (qw( title link description language author copyright modified generator )) { my $val = $feed->$field(); next unless defined $val; @@ -117,9 +140,20 @@ sub splice { } } +sub _convert_entry { + my $feed = shift; + my $entry = shift; + my $feed_format = ref($feed); $feed_format =~ s!^XML::Feed::Format::!!; + my $entry_format = ref($entry); $entry_format =~ s!^XML::Feed::Entry::Format::!!; + return $entry if $entry_format eq $feed_format; + return $entry->convert($feed_format); +} + +sub base; sub format; sub title; sub link; +sub self_link; sub description; sub language; sub author; @@ -129,6 +163,7 @@ sub generator; sub add_entry; sub entries; sub as_xml; +sub id; sub tagline { shift->description(@_) } sub items { $_[0]->entries } @@ -188,8 +223,14 @@ I objects, which it then returns to the caller. Creates a new empty I object using the format I<$format>. + $feed = XML::Feed->new('Atom'); + $feed = XML::Feed->new('RSS'); + $feed = XML::Feed->new('RSS', version => '0.91'); + =head2 XML::Feed->parse($stream) +=head2 XML::Feed->parse($stream, $format) + Parses a syndication feed identified by I<$stream>. I<$stream> can be any one of the following: @@ -213,6 +254,8 @@ A URI from which the feed XML will be retrieved. =back +I<$format> allows you to override format guessing. + =head2 XML::Feed->find_feeds($uri) Given a URI I<$uri>, use auto-discovery to find all of the feeds linked @@ -220,6 +263,10 @@ from that page (using IlinkE> tags). Returns a list of feed URIs. +=head2 XML::Feed->identify_format($xml) + +Given the xml of a feed return what format it is in (C, or some version of C). + =head2 $feed->convert($format) Converts the I object into the I<$format> format, and returns @@ -238,6 +285,10 @@ Returns the format of the feed (C, or some version of C). The title of the feed/channel. +=head2 $feed->base([ $base ]) + +The url base of the feed/channel. + =head2 $feed->link([ $uri ]) The permalink of the feed/channel. @@ -272,11 +323,23 @@ If present, I<$modified> should be a I object. The generator of the feed. +=head2 $feed->self_link ([ $uri ]) + +The Atom Self-link of the feed: + +L + +A string. + =head2 $feed->entries A list of the entries/items in the feed. Returns an array containing I objects. +=head2 $feed->items + +A synonym for I<$feed->entries>. + =head2 $feed->add_entry($entry) Adds an entry to the feed. I<$entry> should be an I @@ -303,6 +366,46 @@ B this will only work for parsing feeds, not creating feeds. =back +=head1 VALID FEEDS + +For reference, this cgi script will create valid, albeit nonsensical feeds +(according to C anyway) for Atom 1.0 and RSS 0.90, +0.91, 1.0 and 2.0. + + #!perl -w + + use strict; + use CGI; + use CGI::Carp qw(fatalsToBrowser); + use DateTime; + use XML::Feed; + + my $cgi = CGI->new; + my @args = ( $cgi->param('format') || "Atom" ); + push @args, ( version => $cgi->param('version') ) if $cgi->param('version'); + + my $feed = XML::Feed->new(@args); + $feed->id("http://".time.rand()."/"); + $feed->title('Test Feed'); + $feed->link($cgi->url); + $feed->self_link($cgi->url( -query => 1, -full => 1, -rewrite => 1) ); + $feed->modified(DateTime->now); + + my $entry = XML::Feed::Entry->new(); + $entry->id("http://".time.rand()."/"); + $entry->link("http://example.com"); + $entry->title("Test entry"); + $entry->summary("Test summary"); + $entry->content("Foo"); + $entry->modified(DateTime->now); + $entry->author('test@example.com (Testy McTesterson)'); + $feed->add_entry($entry); + + my $mime = ("Atom" eq $feed->format) ? "application/atom+xml" : "application/rss+xml"; + print $cgi->header($mime); + print $feed->as_xml; + + =head1 LICENSE I is free software; you may redistribute it and/or modify it @@ -310,7 +413,13 @@ under the same terms as Perl itself. =head1 AUTHOR & COPYRIGHT -Except where otherwise noted, I is Copyright 2004-2005 +Except where otherwise noted, I is Copyright 2004-2008 Six Apart, cpan@sixapart.com. All rights reserved. +=head1 SUBVERSION + +The latest version of I can be found at + + http://code.sixapart.com/svn/XML-Feed/trunk/ + =cut