3 package XML::Feed::Format::RSS;
6 use base qw( XML::Feed );
7 use DateTime::Format::Mail;
8 use DateTime::Format::W3CDTF;
9 use XML::Atom::Util qw(iso2dt);
10 use XML::Feed::Enclosure;
12 our $PREFERRED_PARSER = "XML::RSS";
18 my $tag = $class->_get_first_tag($xml);
19 return ($tag eq 'rss' || $tag eq 'RDF');
23 my ($feed, %args) = @_;
24 $args{'version'} ||= '2.0';
25 eval "use $PREFERRED_PARSER"; die $@ if $@;
26 $feed->{rss} = $PREFERRED_PARSER->new(%args);
27 $feed->{rss}->add_module(prefix => "content", uri => 'http://purl.org/rss/1.0/modules/content/');
28 $feed->{rss}->add_module(prefix => "dcterms", uri => 'http://purl.org/dc/terms/');
29 $feed->{rss}->add_module(prefix => "atom", uri => 'http://www.w3.org/2005/Atom');
30 $feed->{rss}->add_module(prefix => "geo", uri => 'http://www.w3.org/2003/01/geo/wgs84_pos#');
39 hashrefs_instead_of_strings => 1,
41 $opts->{allow_multiple} = [ 'enclosure' ] if $XML::Feed::MULTIPLE_ENCLOSURES;
43 $feed->{rss}->parse($$str, $opts );
48 sub format { 'RSS ' . $_[0]->{rss}->{'version'} }
50 ## The following elements are the same in all versions of RSS.
51 sub title { shift->{rss}->channel('title', @_) }
53 my $link = shift->{rss}->channel('link', @_);
58 sub description { shift->{rss}->channel('description', @_) }
59 sub updated { shift->modified(@_) }
61 # This doesn't exist in RSS
64 ## This is RSS 2.0 only--what's the equivalent in RSS 1.0?
65 sub copyright { shift->{rss}->channel('copyright', @_) }
70 $feed->{rss}->{'xml:base'} = $_[0];
72 $feed->{rss}->{'xml:base'};
76 ## The following all work transparently in any RSS version.
80 $feed->{rss}->channel('language', $_[0]);
81 $feed->{rss}->channel->{dc}{language} = $_[0];
83 $feed->{rss}->channel('language') ||
84 $feed->{rss}->channel->{dc}{language};
94 $feed->{rss}->channel->{'atom'}{'link'} =
98 type => "application/rss+xml",
102 return $feed->{rss}->channel->{'atom'}{'link'};
109 $feed->{rss}->channel('generator', $_[0]);
110 $feed->{rss}->channel->{'http://webns.net/mvcb/'}{generatorAgent} =
113 $feed->{rss}->channel('generator') ||
114 $feed->{rss}->channel->{'http://webns.net/mvcb/'}{generatorAgent};
121 $feed->{rss}->channel('webMaster', $_[0]);
122 $feed->{rss}->channel->{dc}{creator} = $_[0];
124 $feed->{rss}->channel('webMaster') ||
125 $feed->{rss}->channel->{dc}{creator};
130 my $rss = shift->{rss};
132 $rss->channel('pubDate',
133 DateTime::Format::Mail->format_datetime($_[0]));
134 ## XML::RSS is so weird... if I set this, it will try to use
135 ## the value for the lastBuildDate, which I don't want--because
136 ## this date is formatted for an RSS 1.0 feed. So it's commented out.
137 #$rss->channel->{dc}{date} =
138 # DateTime::Format::W3CDTF->format_datetime($_[0]);
142 if (my $ts = $rss->channel('pubDate')) {
145 $date = DateTime::Format::Mail->parse_datetime($ts);
146 } elsif ($ts = $rss->channel->{dc}{date}) {
149 $date = DateTime::Format::W3CDTF->parse_datetime($ts);
157 my $rss = $_[0]->{rss};
159 for my $item (@{ $rss->{items} }) {
160 push @entries, XML::Feed::Entry::Format::RSS->wrap($item);
161 $entries[-1]->{_version} = $rss->{'version'};
168 my $entry = shift || return;
169 $entry = $feed->_convert_entry($entry);
170 $feed->{rss}->add_item(%{ $entry->unwrap });
173 sub as_xml { $_[0]->{rss}->as_string }
175 package XML::Feed::Entry::Format::RSS;
178 sub format { 'RSS ' . $_[0]->{'_version'} }
180 use XML::Feed::Content;
182 use base qw( XML::Feed::Entry );
184 sub init_empty { $_[0]->{entry} = { } }
188 @_ ? $entry->{entry}->{'xml:base'} = $_[0] : $entry->{entry}->{'xml:base'};
193 @_ ? $entry->{entry}{title} = $_[0] : $entry->{entry}{title};
199 $entry->{entry}{link} = $_[0];
200 ## For RSS 2.0 output from XML::RSS. Sigh.
201 $entry->{entry}{permaLink} = $_[0];
203 my $link = $entry->{entry}{link} ||
204 $entry->{entry}{permaLink} ||
205 $entry->{entry}{guid};
213 my $item = shift->{entry};
215 $item->{description} = ref($_[0]) eq 'XML::Feed::Content' ?
217 ## Because of the logic below, we need to add some dummy content,
218 ## so that we'll properly recognize the description we enter as
220 if (!$item->{content}{encoded} &&
221 !$item->{'http://www.w3.org/1999/xhtml'}{body}) {
222 $item->{content}{encoded} = ' ';
225 ## Some RSS feeds use <description> for a summary, and some use it
226 ## for the full content. Pretty gross. We don't want to return the
227 ## full content if the caller expects a summary, so the heuristic is:
228 ## if the <entry> contains both a <description> and one of the elements
229 ## typically used for the full content, use <description> as summary.
231 if ($item->{description} &&
232 ($item->{content}{encoded} ||
233 $item->{'http://www.w3.org/1999/xhtml'}{body})) {
234 $txt = $item->{description};
235 ## Blogspot's 'short' RSS feeds do this in the Atom namespace
236 ## for no obviously good reason.
237 } elsif ($item->{'http://www.w3.org/2005/Atom'}{summary}) {
238 $txt = $item->{'http://www.w3.org/2005/Atom'}{summary};
240 XML::Feed::Content->wrap({ type => 'text/plain', body => $txt });
245 my $item = shift->{entry};
248 if (ref($_[0]) eq 'XML::Feed::Content') {
249 if (defined $_[0]->base) {
250 $c = { 'content' => $_[0]->body, 'xml:base' => $_[0]->base };
257 $item->{content}{encoded} = $c;
261 $item->{content}{encoded} ||
262 $item->{'http://www.w3.org/1999/xhtml'}{body} ||
263 $item->{description};
264 if ('HASH' eq ref($body)) {
265 $base = $body->{'xml:base'};
266 $body = $body->{content};
268 XML::Feed::Content->wrap({ type => 'text/html', body => $body, base => $base });
274 my $item = $entry->{entry};
276 my @tmp = ($entry->category, @_);
277 $item->{category} = [@tmp];
278 $item->{dc}{subject} = [@tmp];
280 my $r = $item->{category} || $item->{dc}{subject};
281 my @r = ref($r) eq 'ARRAY' ? @$r : defined $r? ($r) : ();
282 return wantarray? @r : $r[0];
287 my $item = shift->{entry};
289 $item->{author} = $item->{dc}{creator} = $_[0];
291 $item->{author} || $item->{dc}{creator};
295 ## XML::RSS doesn't give us access to the rdf:about for the <item>,
296 ## so we have to fall back to the <link> element in RSS 1.0 feeds.
298 my $item = shift->{entry};
300 $item->{guid} = $_[0];
302 $item->{guid} || $item->{link};
307 my $item = shift->{entry};
309 $item->{dc}{date} = DateTime::Format::W3CDTF->format_datetime($_[0]);
310 $item->{pubDate} = DateTime::Format::Mail->format_datetime($_[0]);
312 ## Either of these could die if the format is invalid.
315 if (my $ts = $item->{pubDate}) {
316 my $parser = DateTime::Format::Mail->new;
320 $date = $parser->parse_datetime($ts);
321 } elsif ($ts = $item->{dc}{date} or $ts = $item->{dcterms}{date}) {
324 $date = DateTime::Format::W3CDTF->parse_datetime($ts);
332 my $item = shift->{entry};
334 $item->{dcterms}{modified} =
335 DateTime::Format::W3CDTF->format_datetime($_[0]);
337 if (my $ts = $item->{dcterms}{modified} ||
338 $item->{'http://www.w3.org/2005/Atom'}{updated}) {
341 return eval { DateTime::Format::W3CDTF->parse_datetime($ts) } || eval { XML::Atom::Util::iso2dt($ts) };
347 my $item = shift->{entry};
349 $item->{geo}{lat} = $_[0];
351 return $item->{geo}{lat};
356 my $item = shift->{entry};
358 $item->{geo}{long} = $_[0];
360 return $item->{geo}{long};
368 my $enclosure = shift;
370 url => $enclosure->{url},
371 type => $enclosure->{type},
372 length => $enclosure->{length}
374 if ($XML::Feed::MULTIPLE_ENCLOSURES) {
375 push @{$entry->{entry}->{enclosure}}, $val;
377 $entry->{entry}->{enclosure} = $val;
380 my $tmp = $entry->{entry}->{enclosure};
382 my @encs = map { XML::Feed::Enclosure->new($_) }
383 (ref $tmp eq 'ARRAY')? @$tmp : ($tmp);
384 return ($XML::Feed::MULTIPLE_ENCLOSURES)? @encs : $encs[-1];