Update tests
[catagits/XML-Feed.git] / lib / XML / Feed.pm
CommitLineData
c4d4c98e 1# $Id: Feed.pm 1958 2006-08-14 05:31:27Z btrott $
0d5e38d1 2
3package XML::Feed;
4use strict;
5
62d92771 6use base qw( Class::ErrorHandler );
62d92771 7use Feed::Find;
fe71566d 8use URI::Fetch;
d39809aa 9use LWP::UserAgent;
973e1f9e 10use Carp;
0d5e38d1 11
1d3481db 12our $VERSION = '0.23';
973e1f9e 13
14sub new {
15 my $class = shift;
4e9c4625 16 my $format = shift || 'Atom';
729cd7a8 17 my $format_class = 'XML::Feed::Format::' . $format;
973e1f9e 18 eval "use $format_class";
19 Carp::croak("Unsupported format $format: $@") if $@;
729cd7a8 20 my $feed = bless {}, join('::', __PACKAGE__, "Format", $format);
4e9c4625 21 $feed->init_empty(@_) or return $class->error($feed->errstr);
973e1f9e 22 $feed;
23}
24
25sub init_empty { 1 }
0d5e38d1 26
27sub parse {
28 my $class = shift;
41e8c132 29 my($stream, $specified_format) = @_;
0d5e38d1 30 return $class->error("Stream parameter is required") unless $stream;
31 my $feed = bless {}, $class;
32 my $xml = '';
33 if (UNIVERSAL::isa($stream, 'URI')) {
d39809aa 34 my $ua = LWP::UserAgent->new;
35 $ua->env_proxy; # force allowing of proxies
36 my $res = URI::Fetch->fetch($stream, UserAgent => $ua)
fe71566d 37 or return $class->error(URI::Fetch->errstr);
38 return $class->error("This feed has been permanently removed")
b3b6d2fc 39 if $res->status == URI::Fetch::URI_GONE();
fe71566d 40 $xml = $res->content;
0d5e38d1 41 } elsif (ref($stream) eq 'SCALAR') {
42 $xml = $$stream;
43 } elsif (ref($stream)) {
44 while (read($stream, my($chunk), 8192)) {
45 $xml .= $chunk;
46 }
47 } else {
48 open my $fh, $stream
49 or return $class->error("Can't open $stream: $!");
50 while (read $fh, my($chunk), 8192) {
51 $xml .= $chunk;
52 }
53 close $fh;
54 }
55 return $class->error("Can't get feed XML content from $stream")
56 unless $xml;
41e8c132 57 my $format;
58 if ($specified_format) {
59 $format = $specified_format;
60 } else {
0c81cb25 61 $format = $feed->identify_format(\$xml) or return $class->error($feed->errstr);
41e8c132 62 }
63
729cd7a8 64 my $format_class = join '::', __PACKAGE__, "Format", $format;
fe71566d 65 eval "use $format_class";
66 return $class->error("Unsupported format $format: $@") if $@;
67 bless $feed, $format_class;
68 $feed->init_string(\$xml) or return $class->error($feed->errstr);
69 $feed;
70}
71
72sub identify_format {
1d3481db 73 my $feed = shift;
74 my($xml) = @_;
0d5e38d1 75 ## Auto-detect feed type based on first element. This is prone
76 ## to breakage, but then again we don't want to parse the whole
77 ## feed ourselves.
a749d9b9 78 my $tag;
fe71566d 79 while ($$xml =~ /<(\S+)/sg) {
62d92771 80 (my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd;
81 my $first = substr $t, 0, 1;
82 $tag = $t, last unless $first eq '?' || $first eq '!';
a749d9b9 83 }
1d3481db 84 return $feed->error("Cannot find first element") unless $tag;
0d5e38d1 85 $tag =~ s/^.*://;
1d3481db 86 if ($tag eq 'rss' || $tag eq 'RDF') {
87 return 'RSS';
88 } elsif ($tag eq 'feed') {
89 return 'Atom';
90 } else {
91 return $feed->error("Cannot detect feed type");
92 }
0d5e38d1 93}
94
95sub find_feeds {
96 my $class = shift;
97 my($uri) = @_;
62d92771 98 my @feeds = Feed::Find->find($uri)
99 or return $class->error(Feed::Find->errstr);
0d5e38d1 100 @feeds;
101}
102
973e1f9e 103sub convert {
104 my $feed = shift;
105 my($format) = @_;
729cd7a8 106 my $new = XML::Feed->new($format);
ecac864a 107 for my $field (qw( title link description language author copyright modified generator )) {
23103173 108 my $val = $feed->$field();
109 next unless defined $val;
110 $new->$field($val);
973e1f9e 111 }
112 for my $entry ($feed->entries) {
113 $new->add_entry($entry->convert($format));
114 }
115 $new;
116}
117
23103173 118sub splice {
119 my $feed = shift;
120 my($other) = @_;
121 my %ids = map { $_->id => 1 } $feed->entries;
122 for my $entry ($other->entries) {
123 $feed->add_entry($entry) unless $ids{$entry->id}++;
124 }
125}
126
33d4cb3f 127sub _convert_entry {
128 my $feed = shift;
129 my $entry = shift;
729cd7a8 130 my $feed_format = ref($feed); $feed_format =~ s!^XML::Feed::Format::!!;
131 my $entry_format = ref($entry); $entry_format =~ s!^XML::Feed::Entry::Format::!!;
33d4cb3f 132 return $entry if $entry_format eq $feed_format;
133 return $entry->convert($feed_format);
134}
135
5383a560 136sub base;
0d5e38d1 137sub format;
138sub title;
139sub link;
9a36f82c 140sub self_link;
0d5e38d1 141sub description;
142sub language;
973e1f9e 143sub author;
0d5e38d1 144sub copyright;
145sub modified;
146sub generator;
973e1f9e 147sub add_entry;
0d5e38d1 148sub entries;
973e1f9e 149sub as_xml;
813f78d8 150sub id;
0d5e38d1 151
973e1f9e 152sub tagline { shift->description(@_) }
0d5e38d1 153sub items { $_[0]->entries }
154
1551;
156__END__
157
158=head1 NAME
159
160XML::Feed - Syndication feed parser and auto-discovery
161
162=head1 SYNOPSIS
163
164 use XML::Feed;
165 my $feed = XML::Feed->parse(URI->new('http://example.com/atom.xml'))
166 or die XML::Feed->errstr;
167 print $feed->title, "\n";
168 for my $entry ($feed->entries) {
169 }
170
171 ## Find all of the syndication feeds on a given page, using
172 ## auto-discovery.
173 my @feeds = XML::Feed->find_feeds('http://example.com/');
174
175=head1 DESCRIPTION
176
177I<XML::Feed> is a syndication feed parser for both RSS and Atom feeds. It
178also implements feed auto-discovery for finding feeds, given a URI.
179
180I<XML::Feed> supports the following syndication feed formats:
181
182=over 4
183
184=item * RSS 0.91
185
186=item * RSS 1.0
187
188=item * RSS 2.0
189
190=item * Atom
191
192=back
193
194The goal of I<XML::Feed> is to provide a unified API for parsing and using
195the various syndication formats. The different flavors of RSS and Atom
196handle data in different ways: date handling; summaries and content;
197escaping and quoting; etc. This module attempts to remove those differences
198by providing a wrapper around the formats and the classes implementing
199those formats (I<XML::RSS> and I<XML::Atom::Feed>). For example, dates are
200handled differently in each of the above formats. To provide a unified API for
201date handling, I<XML::Feed> converts all date formats transparently into
202I<DateTime> objects, which it then returns to the caller.
203
204=head1 USAGE
205
973e1f9e 206=head2 XML::Feed->new($format)
207
208Creates a new empty I<XML::Feed> object using the format I<$format>.
209
813f78d8 210 $feed = XML::Feed->new('Atom');
211 $feed = XML::Feed->new('RSS');
212 $feed = XML::Feed->new('RSS', version => '0.91');
213
0d5e38d1 214=head2 XML::Feed->parse($stream)
215
41e8c132 216=head2 XML::Feed->parse($stream, $format)
217
0d5e38d1 218Parses a syndication feed identified by I<$stream>. I<$stream> can be any
219one of the following:
220
221=over 4
222
223=item * Scalar reference
224
225A reference to string containing the XML body of the feed.
226
227=item * Filehandle
228
229An open filehandle from which the feed XML will be read.
230
231=item * File name
232
233The name of a file containing the feed XML.
234
235=item * URI object
236
237A URI from which the feed XML will be retrieved.
238
239=back
240
8c30ad3d 241I<$format> allows you to override format guessing.
41e8c132 242
0d5e38d1 243=head2 XML::Feed->find_feeds($uri)
244
245Given a URI I<$uri>, use auto-discovery to find all of the feeds linked
246from that page (using I<E<lt>linkE<gt>> tags).
247
248Returns a list of feed URIs.
249
8c30ad3d 250=head2 XML::Feed->identify_format($xml)
251
252Given the xml of a feed return what format it is in (C<Atom>, or some version of C<RSS>).
253
973e1f9e 254=head2 $feed->convert($format)
255
256Converts the I<XML::Feed> object into the I<$format> format, and returns
257the new object.
258
23103173 259=head2 $feed->splice($other_feed)
260
261Splices in all of the entries from the feed I<$other_feed> into I<$feed>,
262skipping posts that are already in I<$feed>.
263
0d5e38d1 264=head2 $feed->format
265
266Returns the format of the feed (C<Atom>, or some version of C<RSS>).
267
973e1f9e 268=head2 $feed->title([ $title ])
0d5e38d1 269
270The title of the feed/channel.
271
5383a560 272=head2 $feed->base([ $base ])
273
274The url base of the feed/channel.
275
973e1f9e 276=head2 $feed->link([ $uri ])
0d5e38d1 277
278The permalink of the feed/channel.
279
973e1f9e 280=head2 $feed->tagline([ $tagline ])
0d5e38d1 281
282The description or tagline of the feed/channel.
283
973e1f9e 284=head2 $feed->description([ $description ])
0d5e38d1 285
286Alias for I<$feed-E<gt>tagline>.
287
973e1f9e 288=head2 $feed->author([ $author ])
289
290The author of the feed/channel.
291
292=head2 $feed->language([ $language ])
0d5e38d1 293
294The language of the feed.
295
973e1f9e 296=head2 $feed->copyright([ $copyright ])
0d5e38d1 297
298The copyright notice of the feed.
299
973e1f9e 300=head2 $feed->modified([ $modified ])
0d5e38d1 301
302A I<DateTime> object representing the last-modified date of the feed.
303
973e1f9e 304If present, I<$modified> should be a I<DateTime> object.
305
306=head2 $feed->generator([ $generator ])
0d5e38d1 307
308The generator of the feed.
309
9a36f82c 310=head2 $feed->self_link ([ $uri ])
311
312The Atom Self-link of the feed:
313
314L<http://validator.w3.org/feed/docs/warning/MissingAtomSelfLink.html>
315
316A string.
317
0d5e38d1 318=head2 $feed->entries
319
320A list of the entries/items in the feed. Returns an array containing
321I<XML::Feed::Entry> objects.
322
8c30ad3d 323=head2 $feed->items
324
325A synonym for I<$feed->entries>.
326
973e1f9e 327=head2 $feed->add_entry($entry)
328
329Adds an entry to the feed. I<$entry> should be an I<XML::Feed::Entry>
330object in the correct format for the feed.
331
332=head2 $feed->as_xml
333
334Returns an XML representation of the feed, in the format determined by
335the current format of the I<$feed> object.
336
337=head1 PACKAGE VARIABLES
338
339=over 4
340
341=item C<$XML::Feed::RSS::PREFERRED_PARSER>
342
343If you want to use another RSS parser class than XML::RSS (default), you can
344change the class by setting C<$PREFERRED_PARSER> variable in XML::Feed::RSS
345package.
346
347 $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML";
348
349B<Note:> this will only work for parsing feeds, not creating feeds.
350
351=back
352
b8bc97f3 353=head1 VALID FEEDS
354
355For reference, this cgi script will create valid, albeit nonsensical feeds
356(according to C<http://feedvalidator.org> anyway) for Atom 1.0 and RSS 0.90,
3570.91, 1.0 and 2.0.
358
359 #!perl -w
360
361 use strict;
362 use CGI;
363 use CGI::Carp qw(fatalsToBrowser);
364 use DateTime;
365 use XML::Feed;
366
367 my $cgi = CGI->new;
368 my @args = ( $cgi->param('format') || "Atom" );
369 push @args, ( version => $cgi->param('version') ) if $cgi->param('version');
370
371 my $feed = XML::Feed->new(@args);
372 $feed->id("http://".time.rand()."/");
373 $feed->title('Test Feed');
374 $feed->link($cgi->url);
375 $feed->self_link($cgi->url( -query => 1, -full => 1, -rewrite => 1) );
376 $feed->modified(DateTime->now);
377
378 my $entry = XML::Feed::Entry->new();
379 $entry->id("http://".time.rand()."/");
380 $entry->link("http://example.com");
381 $entry->title("Test entry");
382 $entry->summary("Test summary");
383 $entry->content("Foo");
384 $entry->modified(DateTime->now);
385 $entry->author('test@example.com (Testy McTesterson)');
386 $feed->add_entry($entry);
387
388 my $mime = ("Atom" eq $feed->format) ? "application/atom+xml" : "application/rss+xml";
389 print $cgi->header($mime);
390 print $feed->as_xml;
391
392
0d5e38d1 393=head1 LICENSE
394
395I<XML::Feed> is free software; you may redistribute it and/or modify it
396under the same terms as Perl itself.
397
398=head1 AUTHOR & COPYRIGHT
399
70f935cc 400Except where otherwise noted, I<XML::Feed> is Copyright 2004-2008
973e1f9e 401Six Apart, cpan@sixapart.com. All rights reserved.
0d5e38d1 402
70f935cc 403=head1 SUBVERSION
404
405The latest version of I<XML::Feed> can be found at
406
407 http://code.sixapart.com/svn/XML-Feed/trunk/
408
0d5e38d1 409=cut