Commit | Line | Data |
973e1f9e |
1 | # $Id: Feed.pm 1869 2005-08-10 00:02:25Z btrott $ |
0d5e38d1 |
2 | |
3 | package XML::Feed; |
4 | use strict; |
5 | |
62d92771 |
6 | use base qw( Class::ErrorHandler ); |
62d92771 |
7 | use Feed::Find; |
fe71566d |
8 | use URI::Fetch; |
973e1f9e |
9 | use Carp; |
0d5e38d1 |
10 | |
973e1f9e |
11 | our $VERSION = '0.06'; |
12 | |
13 | sub new { |
14 | my $class = shift; |
15 | my($format) = @_; |
16 | $format ||= 'Atom'; |
17 | my $format_class = 'XML::Feed::' . $format; |
18 | eval "use $format_class"; |
19 | Carp::croak("Unsupported format $format: $@") if $@; |
20 | my $feed = bless {}, join('::', __PACKAGE__, $format); |
21 | $feed->init_empty or return $class->error($feed->errstr); |
22 | $feed; |
23 | } |
24 | |
25 | sub init_empty { 1 } |
0d5e38d1 |
26 | |
27 | sub parse { |
28 | my $class = shift; |
29 | my($stream) = @_; |
30 | return $class->error("Stream parameter is required") unless $stream; |
31 | my $feed = bless {}, $class; |
32 | my $xml = ''; |
33 | if (UNIVERSAL::isa($stream, 'URI')) { |
fe71566d |
34 | my $res = URI::Fetch->fetch($stream) |
35 | or return $class->error(URI::Fetch->errstr); |
36 | return $class->error("This feed has been permanently removed") |
b3b6d2fc |
37 | if $res->status == URI::Fetch::URI_GONE(); |
fe71566d |
38 | $xml = $res->content; |
0d5e38d1 |
39 | } elsif (ref($stream) eq 'SCALAR') { |
40 | $xml = $$stream; |
41 | } elsif (ref($stream)) { |
42 | while (read($stream, my($chunk), 8192)) { |
43 | $xml .= $chunk; |
44 | } |
45 | } else { |
46 | open my $fh, $stream |
47 | or return $class->error("Can't open $stream: $!"); |
48 | while (read $fh, my($chunk), 8192) { |
49 | $xml .= $chunk; |
50 | } |
51 | close $fh; |
52 | } |
53 | return $class->error("Can't get feed XML content from $stream") |
54 | unless $xml; |
fe71566d |
55 | my $format = $feed->identify_format(\$xml) |
56 | or return $class->error($feed->errstr); |
57 | my $format_class = join '::', __PACKAGE__, $format; |
58 | eval "use $format_class"; |
59 | return $class->error("Unsupported format $format: $@") if $@; |
60 | bless $feed, $format_class; |
61 | $feed->init_string(\$xml) or return $class->error($feed->errstr); |
62 | $feed; |
63 | } |
64 | |
65 | sub identify_format { |
66 | my $feed = shift; |
67 | my($xml) = @_; |
0d5e38d1 |
68 | ## Auto-detect feed type based on first element. This is prone |
69 | ## to breakage, but then again we don't want to parse the whole |
70 | ## feed ourselves. |
a749d9b9 |
71 | my $tag; |
fe71566d |
72 | while ($$xml =~ /<(\S+)/sg) { |
62d92771 |
73 | (my $t = $1) =~ tr/a-zA-Z0-9:\-\?!//cd; |
74 | my $first = substr $t, 0, 1; |
75 | $tag = $t, last unless $first eq '?' || $first eq '!'; |
a749d9b9 |
76 | } |
fe71566d |
77 | return $feed->error("Cannot find first element") unless $tag; |
0d5e38d1 |
78 | $tag =~ s/^.*://; |
79 | if ($tag eq 'rss' || $tag eq 'RDF') { |
fe71566d |
80 | return 'RSS'; |
0d5e38d1 |
81 | } elsif ($tag eq 'feed') { |
fe71566d |
82 | return 'Atom'; |
0d5e38d1 |
83 | } else { |
fe71566d |
84 | return $feed->error("Cannot detect feed type"); |
0d5e38d1 |
85 | } |
0d5e38d1 |
86 | } |
87 | |
88 | sub find_feeds { |
89 | my $class = shift; |
90 | my($uri) = @_; |
62d92771 |
91 | my @feeds = Feed::Find->find($uri) |
92 | or return $class->error(Feed::Find->errstr); |
0d5e38d1 |
93 | @feeds; |
94 | } |
95 | |
973e1f9e |
96 | sub convert { |
97 | my $feed = shift; |
98 | my($format) = @_; |
99 | my $new = __PACKAGE__->new($format); |
100 | for my $field (qw( title link description language copyright modified generator )) { |
101 | $new->$field($feed->$field()); |
102 | } |
103 | for my $entry ($feed->entries) { |
104 | $new->add_entry($entry->convert($format)); |
105 | } |
106 | $new; |
107 | } |
108 | |
0d5e38d1 |
109 | sub format; |
110 | sub title; |
111 | sub link; |
112 | sub description; |
113 | sub language; |
973e1f9e |
114 | sub author; |
0d5e38d1 |
115 | sub copyright; |
116 | sub modified; |
117 | sub generator; |
973e1f9e |
118 | sub add_entry; |
0d5e38d1 |
119 | sub entries; |
973e1f9e |
120 | sub as_xml; |
0d5e38d1 |
121 | |
973e1f9e |
122 | sub tagline { shift->description(@_) } |
0d5e38d1 |
123 | sub items { $_[0]->entries } |
124 | |
125 | 1; |
126 | __END__ |
127 | |
128 | =head1 NAME |
129 | |
130 | XML::Feed - Syndication feed parser and auto-discovery |
131 | |
132 | =head1 SYNOPSIS |
133 | |
134 | use XML::Feed; |
135 | my $feed = XML::Feed->parse(URI->new('http://example.com/atom.xml')) |
136 | or die XML::Feed->errstr; |
137 | print $feed->title, "\n"; |
138 | for my $entry ($feed->entries) { |
139 | } |
140 | |
141 | ## Find all of the syndication feeds on a given page, using |
142 | ## auto-discovery. |
143 | my @feeds = XML::Feed->find_feeds('http://example.com/'); |
144 | |
145 | =head1 DESCRIPTION |
146 | |
147 | I<XML::Feed> is a syndication feed parser for both RSS and Atom feeds. It |
148 | also implements feed auto-discovery for finding feeds, given a URI. |
149 | |
150 | I<XML::Feed> supports the following syndication feed formats: |
151 | |
152 | =over 4 |
153 | |
154 | =item * RSS 0.91 |
155 | |
156 | =item * RSS 1.0 |
157 | |
158 | =item * RSS 2.0 |
159 | |
160 | =item * Atom |
161 | |
162 | =back |
163 | |
164 | The goal of I<XML::Feed> is to provide a unified API for parsing and using |
165 | the various syndication formats. The different flavors of RSS and Atom |
166 | handle data in different ways: date handling; summaries and content; |
167 | escaping and quoting; etc. This module attempts to remove those differences |
168 | by providing a wrapper around the formats and the classes implementing |
169 | those formats (I<XML::RSS> and I<XML::Atom::Feed>). For example, dates are |
170 | handled differently in each of the above formats. To provide a unified API for |
171 | date handling, I<XML::Feed> converts all date formats transparently into |
172 | I<DateTime> objects, which it then returns to the caller. |
173 | |
174 | =head1 USAGE |
175 | |
973e1f9e |
176 | =head2 XML::Feed->new($format) |
177 | |
178 | Creates a new empty I<XML::Feed> object using the format I<$format>. |
179 | |
0d5e38d1 |
180 | =head2 XML::Feed->parse($stream) |
181 | |
182 | Parses a syndication feed identified by I<$stream>. I<$stream> can be any |
183 | one of the following: |
184 | |
185 | =over 4 |
186 | |
187 | =item * Scalar reference |
188 | |
189 | A reference to string containing the XML body of the feed. |
190 | |
191 | =item * Filehandle |
192 | |
193 | An open filehandle from which the feed XML will be read. |
194 | |
195 | =item * File name |
196 | |
197 | The name of a file containing the feed XML. |
198 | |
199 | =item * URI object |
200 | |
201 | A URI from which the feed XML will be retrieved. |
202 | |
203 | =back |
204 | |
205 | =head2 XML::Feed->find_feeds($uri) |
206 | |
207 | Given a URI I<$uri>, use auto-discovery to find all of the feeds linked |
208 | from that page (using I<E<lt>linkE<gt>> tags). |
209 | |
210 | Returns a list of feed URIs. |
211 | |
973e1f9e |
212 | =head2 $feed->convert($format) |
213 | |
214 | Converts the I<XML::Feed> object into the I<$format> format, and returns |
215 | the new object. |
216 | |
0d5e38d1 |
217 | =head2 $feed->format |
218 | |
219 | Returns the format of the feed (C<Atom>, or some version of C<RSS>). |
220 | |
973e1f9e |
221 | =head2 $feed->title([ $title ]) |
0d5e38d1 |
222 | |
223 | The title of the feed/channel. |
224 | |
973e1f9e |
225 | =head2 $feed->link([ $uri ]) |
0d5e38d1 |
226 | |
227 | The permalink of the feed/channel. |
228 | |
973e1f9e |
229 | =head2 $feed->tagline([ $tagline ]) |
0d5e38d1 |
230 | |
231 | The description or tagline of the feed/channel. |
232 | |
973e1f9e |
233 | =head2 $feed->description([ $description ]) |
0d5e38d1 |
234 | |
235 | Alias for I<$feed-E<gt>tagline>. |
236 | |
973e1f9e |
237 | =head2 $feed->author([ $author ]) |
238 | |
239 | The author of the feed/channel. |
240 | |
241 | =head2 $feed->language([ $language ]) |
0d5e38d1 |
242 | |
243 | The language of the feed. |
244 | |
973e1f9e |
245 | =head2 $feed->copyright([ $copyright ]) |
0d5e38d1 |
246 | |
247 | The copyright notice of the feed. |
248 | |
973e1f9e |
249 | =head2 $feed->modified([ $modified ]) |
0d5e38d1 |
250 | |
251 | A I<DateTime> object representing the last-modified date of the feed. |
252 | |
973e1f9e |
253 | If present, I<$modified> should be a I<DateTime> object. |
254 | |
255 | =head2 $feed->generator([ $generator ]) |
0d5e38d1 |
256 | |
257 | The generator of the feed. |
258 | |
259 | =head2 $feed->entries |
260 | |
261 | A list of the entries/items in the feed. Returns an array containing |
262 | I<XML::Feed::Entry> objects. |
263 | |
973e1f9e |
264 | =head2 $feed->add_entry($entry) |
265 | |
266 | Adds an entry to the feed. I<$entry> should be an I<XML::Feed::Entry> |
267 | object in the correct format for the feed. |
268 | |
269 | =head2 $feed->as_xml |
270 | |
271 | Returns an XML representation of the feed, in the format determined by |
272 | the current format of the I<$feed> object. |
273 | |
274 | =head1 PACKAGE VARIABLES |
275 | |
276 | =over 4 |
277 | |
278 | =item C<$XML::Feed::RSS::PREFERRED_PARSER> |
279 | |
280 | If you want to use another RSS parser class than XML::RSS (default), you can |
281 | change the class by setting C<$PREFERRED_PARSER> variable in XML::Feed::RSS |
282 | package. |
283 | |
284 | $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML"; |
285 | |
286 | B<Note:> this will only work for parsing feeds, not creating feeds. |
287 | |
288 | =back |
289 | |
0d5e38d1 |
290 | =head1 LICENSE |
291 | |
292 | I<XML::Feed> is free software; you may redistribute it and/or modify it |
293 | under the same terms as Perl itself. |
294 | |
295 | =head1 AUTHOR & COPYRIGHT |
296 | |
973e1f9e |
297 | Except where otherwise noted, I<XML::Feed> is Copyright 2004-2005 |
298 | Six Apart, cpan@sixapart.com. All rights reserved. |
0d5e38d1 |
299 | |
300 | =cut |