Get rid of the inc directory
[catagits/XML-Feed.git] / lib / XML / Feed.pm
CommitLineData
c4d4c98e 1# $Id: Feed.pm 1958 2006-08-14 05:31:27Z btrott $
0d5e38d1 2
3package XML::Feed;
4use strict;
5
62d92771 6use base qw( Class::ErrorHandler );
62d92771 7use Feed::Find;
fe71566d 8use URI::Fetch;
973e1f9e 9use Carp;
0d5e38d1 10
632e1639 11our $VERSION = '0.21';
973e1f9e 12
13sub new {
14 my $class = shift;
4e9c4625 15 my $format = shift || 'Atom';
973e1f9e 16 my $format_class = 'XML::Feed::' . $format;
17 eval "use $format_class";
18 Carp::croak("Unsupported format $format: $@") if $@;
19 my $feed = bless {}, join('::', __PACKAGE__, $format);
4e9c4625 20 $feed->init_empty(@_) or return $class->error($feed->errstr);
973e1f9e 21 $feed;
22}
23
24sub init_empty { 1 }
0d5e38d1 25
26sub parse {
27 my $class = shift;
41e8c132 28 my($stream, $specified_format) = @_;
0d5e38d1 29 return $class->error("Stream parameter is required") unless $stream;
30 my $feed = bless {}, $class;
31 my $xml = '';
32 if (UNIVERSAL::isa($stream, 'URI')) {
fe71566d 33 my $res = URI::Fetch->fetch($stream)
34 or return $class->error(URI::Fetch->errstr);
35 return $class->error("This feed has been permanently removed")
b3b6d2fc 36 if $res->status == URI::Fetch::URI_GONE();
fe71566d 37 $xml = $res->content;
0d5e38d1 38 } elsif (ref($stream) eq 'SCALAR') {
39 $xml = $$stream;
40 } elsif (ref($stream)) {
41 while (read($stream, my($chunk), 8192)) {
42 $xml .= $chunk;
43 }
44 } else {
45 open my $fh, $stream
46 or return $class->error("Can't open $stream: $!");
47 while (read $fh, my($chunk), 8192) {
48 $xml .= $chunk;
49 }
50 close $fh;
51 }
52 return $class->error("Can't get feed XML content from $stream")
53 unless $xml;
41e8c132 54 my $format;
55 if ($specified_format) {
56 $format = $specified_format;
57 } else {
0c81cb25 58 $format = $feed->identify_format(\$xml) or return $class->error($feed->errstr);
41e8c132 59 }
60
fe71566d 61 my $format_class = join '::', __PACKAGE__, $format;
62 eval "use $format_class";
63 return $class->error("Unsupported format $format: $@") if $@;
64 bless $feed, $format_class;
65 $feed->init_string(\$xml) or return $class->error($feed->errstr);
66 $feed;
67}
68
69sub identify_format {
70 my $feed = shift;
71 my($xml) = @_;
0d5e38d1 72 ## Auto-detect feed type based on first element. This is prone
73 ## to breakage, but then again we don't want to parse the whole
74 ## feed ourselves.
a749d9b9 75 my $tag;
fe71566d 76 while ($$xml =~ /<(\S+)/sg) {
62d92771 77 (my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd;
78 my $first = substr $t, 0, 1;
79 $tag = $t, last unless $first eq '?' || $first eq '!';
a749d9b9 80 }
fe71566d 81 return $feed->error("Cannot find first element") unless $tag;
0d5e38d1 82 $tag =~ s/^.*://;
83 if ($tag eq 'rss' || $tag eq 'RDF') {
fe71566d 84 return 'RSS';
0d5e38d1 85 } elsif ($tag eq 'feed') {
fe71566d 86 return 'Atom';
0d5e38d1 87 } else {
fe71566d 88 return $feed->error("Cannot detect feed type");
0d5e38d1 89 }
0d5e38d1 90}
91
92sub find_feeds {
93 my $class = shift;
94 my($uri) = @_;
62d92771 95 my @feeds = Feed::Find->find($uri)
96 or return $class->error(Feed::Find->errstr);
0d5e38d1 97 @feeds;
98}
99
973e1f9e 100sub convert {
101 my $feed = shift;
102 my($format) = @_;
103 my $new = __PACKAGE__->new($format);
ecac864a 104 for my $field (qw( title link description language author copyright modified generator )) {
23103173 105 my $val = $feed->$field();
106 next unless defined $val;
107 $new->$field($val);
973e1f9e 108 }
109 for my $entry ($feed->entries) {
110 $new->add_entry($entry->convert($format));
111 }
112 $new;
113}
114
23103173 115sub splice {
116 my $feed = shift;
117 my($other) = @_;
118 my %ids = map { $_->id => 1 } $feed->entries;
119 for my $entry ($other->entries) {
120 $feed->add_entry($entry) unless $ids{$entry->id}++;
121 }
122}
123
0d5e38d1 124sub format;
125sub title;
126sub link;
127sub description;
128sub language;
973e1f9e 129sub author;
0d5e38d1 130sub copyright;
131sub modified;
132sub generator;
973e1f9e 133sub add_entry;
0d5e38d1 134sub entries;
973e1f9e 135sub as_xml;
0d5e38d1 136
973e1f9e 137sub tagline { shift->description(@_) }
0d5e38d1 138sub items { $_[0]->entries }
139
1401;
141__END__
142
143=head1 NAME
144
145XML::Feed - Syndication feed parser and auto-discovery
146
147=head1 SYNOPSIS
148
149 use XML::Feed;
150 my $feed = XML::Feed->parse(URI->new('http://example.com/atom.xml'))
151 or die XML::Feed->errstr;
152 print $feed->title, "\n";
153 for my $entry ($feed->entries) {
154 }
155
156 ## Find all of the syndication feeds on a given page, using
157 ## auto-discovery.
158 my @feeds = XML::Feed->find_feeds('http://example.com/');
159
160=head1 DESCRIPTION
161
162I<XML::Feed> is a syndication feed parser for both RSS and Atom feeds. It
163also implements feed auto-discovery for finding feeds, given a URI.
164
165I<XML::Feed> supports the following syndication feed formats:
166
167=over 4
168
169=item * RSS 0.91
170
171=item * RSS 1.0
172
173=item * RSS 2.0
174
175=item * Atom
176
177=back
178
179The goal of I<XML::Feed> is to provide a unified API for parsing and using
180the various syndication formats. The different flavors of RSS and Atom
181handle data in different ways: date handling; summaries and content;
182escaping and quoting; etc. This module attempts to remove those differences
183by providing a wrapper around the formats and the classes implementing
184those formats (I<XML::RSS> and I<XML::Atom::Feed>). For example, dates are
185handled differently in each of the above formats. To provide a unified API for
186date handling, I<XML::Feed> converts all date formats transparently into
187I<DateTime> objects, which it then returns to the caller.
188
189=head1 USAGE
190
973e1f9e 191=head2 XML::Feed->new($format)
192
193Creates a new empty I<XML::Feed> object using the format I<$format>.
194
0d5e38d1 195=head2 XML::Feed->parse($stream)
196
41e8c132 197=head2 XML::Feed->parse($stream, $format)
198
0d5e38d1 199Parses a syndication feed identified by I<$stream>. I<$stream> can be any
200one of the following:
201
202=over 4
203
204=item * Scalar reference
205
206A reference to string containing the XML body of the feed.
207
208=item * Filehandle
209
210An open filehandle from which the feed XML will be read.
211
212=item * File name
213
214The name of a file containing the feed XML.
215
216=item * URI object
217
218A URI from which the feed XML will be retrieved.
219
220=back
221
41e8c132 222C<$format> allows you to override format guessing.
223
0d5e38d1 224=head2 XML::Feed->find_feeds($uri)
225
226Given a URI I<$uri>, use auto-discovery to find all of the feeds linked
227from that page (using I<E<lt>linkE<gt>> tags).
228
229Returns a list of feed URIs.
230
973e1f9e 231=head2 $feed->convert($format)
232
233Converts the I<XML::Feed> object into the I<$format> format, and returns
234the new object.
235
23103173 236=head2 $feed->splice($other_feed)
237
238Splices in all of the entries from the feed I<$other_feed> into I<$feed>,
239skipping posts that are already in I<$feed>.
240
0d5e38d1 241=head2 $feed->format
242
243Returns the format of the feed (C<Atom>, or some version of C<RSS>).
244
973e1f9e 245=head2 $feed->title([ $title ])
0d5e38d1 246
247The title of the feed/channel.
248
973e1f9e 249=head2 $feed->link([ $uri ])
0d5e38d1 250
251The permalink of the feed/channel.
252
973e1f9e 253=head2 $feed->tagline([ $tagline ])
0d5e38d1 254
255The description or tagline of the feed/channel.
256
973e1f9e 257=head2 $feed->description([ $description ])
0d5e38d1 258
259Alias for I<$feed-E<gt>tagline>.
260
973e1f9e 261=head2 $feed->author([ $author ])
262
263The author of the feed/channel.
264
265=head2 $feed->language([ $language ])
0d5e38d1 266
267The language of the feed.
268
973e1f9e 269=head2 $feed->copyright([ $copyright ])
0d5e38d1 270
271The copyright notice of the feed.
272
973e1f9e 273=head2 $feed->modified([ $modified ])
0d5e38d1 274
275A I<DateTime> object representing the last-modified date of the feed.
276
973e1f9e 277If present, I<$modified> should be a I<DateTime> object.
278
279=head2 $feed->generator([ $generator ])
0d5e38d1 280
281The generator of the feed.
282
283=head2 $feed->entries
284
285A list of the entries/items in the feed. Returns an array containing
286I<XML::Feed::Entry> objects.
287
973e1f9e 288=head2 $feed->add_entry($entry)
289
290Adds an entry to the feed. I<$entry> should be an I<XML::Feed::Entry>
291object in the correct format for the feed.
292
293=head2 $feed->as_xml
294
295Returns an XML representation of the feed, in the format determined by
296the current format of the I<$feed> object.
297
298=head1 PACKAGE VARIABLES
299
300=over 4
301
302=item C<$XML::Feed::RSS::PREFERRED_PARSER>
303
304If you want to use another RSS parser class than XML::RSS (default), you can
305change the class by setting C<$PREFERRED_PARSER> variable in XML::Feed::RSS
306package.
307
308 $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML";
309
310B<Note:> this will only work for parsing feeds, not creating feeds.
311
312=back
313
0d5e38d1 314=head1 LICENSE
315
316I<XML::Feed> is free software; you may redistribute it and/or modify it
317under the same terms as Perl itself.
318
319=head1 AUTHOR & COPYRIGHT
320
70f935cc 321Except where otherwise noted, I<XML::Feed> is Copyright 2004-2008
973e1f9e 322Six Apart, cpan@sixapart.com. All rights reserved.
0d5e38d1 323
70f935cc 324=head1 SUBVERSION
325
326The latest version of I<XML::Feed> can be found at
327
328 http://code.sixapart.com/svn/XML-Feed/trunk/
329
0d5e38d1 330=cut