ignore *.swp files
[catagits/XML-Feed.git] / lib / XML / Feed / Format / RSS.pm
CommitLineData
3353d70c 1# $Id$
0d5e38d1 2
729cd7a8 3package XML::Feed::Format::RSS;
0d5e38d1 4use strict;
5
6use base qw( XML::Feed );
0d5e38d1 7use DateTime::Format::Mail;
8use DateTime::Format::W3CDTF;
723eaa15 9use XML::Atom::Util qw(iso2dt);
7b5fccb1 10use XML::Feed::Enclosure;
0d5e38d1 11
973e1f9e 12our $PREFERRED_PARSER = "XML::RSS";
13
9b6bc912 14
15sub identify {
16 my $class = shift;
17 my $xml = shift;
18 my $tag = $class->_get_first_tag($xml);
19 return ($tag eq 'rss' || $tag eq 'RDF');
20}
21
973e1f9e 22sub init_empty {
4e9c4625 23 my ($feed, %args) = @_;
24 $args{'version'} ||= '2.0';
973e1f9e 25 eval "use $PREFERRED_PARSER"; die $@ if $@;
4e9c4625 26 $feed->{rss} = $PREFERRED_PARSER->new(%args);
fe3b3201 27 $feed->{rss}->add_module(prefix => "content", uri => 'http://purl.org/rss/1.0/modules/content/');
813f78d8 28 $feed->{rss}->add_module(prefix => "dcterms", uri => 'http://purl.org/dc/terms/');
9a36f82c 29 $feed->{rss}->add_module(prefix => "atom", uri => 'http://www.w3.org/2005/Atom');
30 $feed->{rss}->add_module(prefix => "geo", uri => 'http://www.w3.org/2003/01/geo/wgs84_pos#');
973e1f9e 31 $feed;
32}
33
0d5e38d1 34sub init_string {
35 my $feed = shift;
36 my($str) = @_;
973e1f9e 37 $feed->init_empty;
b1aa7a62 38 my $opts = {
39 hashrefs_instead_of_strings => 1,
40 };
41 $opts->{allow_multiple} = [ 'enclosure' ] if $XML::Feed::MULTIPLE_ENCLOSURES;
0d5e38d1 42 if ($str) {
b1aa7a62 43 $feed->{rss}->parse($$str, $opts );
0d5e38d1 44 }
45 $feed;
46}
47
48sub format { 'RSS ' . $_[0]->{rss}->{'version'} }
49
50## The following elements are the same in all versions of RSS.
973e1f9e 51sub title { shift->{rss}->channel('title', @_) }
710ed823 52sub link {
53 my $link = shift->{rss}->channel('link', @_);
54 $link =~ s/^\s+//;
55 $link =~ s/\s+$//;
56 return $link;
57}
973e1f9e 58sub description { shift->{rss}->channel('description', @_) }
e23387f1 59sub updated { shift->modified(@_) }
0d5e38d1 60
813f78d8 61# This doesn't exist in RSS
62sub id { }
63
0d5e38d1 64## This is RSS 2.0 only--what's the equivalent in RSS 1.0?
973e1f9e 65sub copyright { shift->{rss}->channel('copyright', @_) }
0d5e38d1 66
5383a560 67sub base {
68 my $feed = shift;
69 if (@_) {
70 $feed->{rss}->{'xml:base'} = $_[0];
71 } else {
72 $feed->{rss}->{'xml:base'};
73 }
74}
75
0d5e38d1 76## The following all work transparently in any RSS version.
77sub language {
973e1f9e 78 my $feed = shift;
79 if (@_) {
80 $feed->{rss}->channel('language', $_[0]);
81 $feed->{rss}->channel->{dc}{language} = $_[0];
82 } else {
83 $feed->{rss}->channel('language') ||
84 $feed->{rss}->channel->{dc}{language};
85 }
0d5e38d1 86}
87
9a36f82c 88sub self_link {
89 my $feed = shift;
90
91 if (@_) {
92 my $uri = shift;
93
94 $feed->{rss}->channel->{'atom'}{'link'} =
95 {
96 rel => "self",
97 href => $uri,
98 type => "application/rss+xml",
99 };
100 }
101
102 return $feed->{rss}->channel->{'atom'}{'link'};
103}
104
105
0d5e38d1 106sub generator {
973e1f9e 107 my $feed = shift;
108 if (@_) {
109 $feed->{rss}->channel('generator', $_[0]);
110 $feed->{rss}->channel->{'http://webns.net/mvcb/'}{generatorAgent} =
111 $_[0];
112 } else {
113 $feed->{rss}->channel('generator') ||
114 $feed->{rss}->channel->{'http://webns.net/mvcb/'}{generatorAgent};
115 }
0d5e38d1 116}
117
118sub author {
973e1f9e 119 my $feed = shift;
120 if (@_) {
121 $feed->{rss}->channel('webMaster', $_[0]);
122 $feed->{rss}->channel->{dc}{creator} = $_[0];
123 } else {
124 $feed->{rss}->channel('webMaster') ||
125 $feed->{rss}->channel->{dc}{creator};
126 }
0d5e38d1 127}
128
129sub modified {
973e1f9e 130 my $rss = shift->{rss};
131 if (@_) {
132 $rss->channel('pubDate',
133 DateTime::Format::Mail->format_datetime($_[0]));
134 ## XML::RSS is so weird... if I set this, it will try to use
135 ## the value for the lastBuildDate, which I don't want--because
136 ## this date is formatted for an RSS 1.0 feed. So it's commented out.
137 #$rss->channel->{dc}{date} =
138 # DateTime::Format::W3CDTF->format_datetime($_[0]);
fe3b3201 139 } else {
140 my $date;
141 eval {
142 if (my $ts = $rss->channel('pubDate')) {
710ed823 143 $ts =~ s/^\s+//;
144 $ts =~ s/\s+$//;
fe3b3201 145 $date = DateTime::Format::Mail->parse_datetime($ts);
146 } elsif ($ts = $rss->channel->{dc}{date}) {
710ed823 147 $ts =~ s/^\s+//;
148 $ts =~ s/\s+$//;
fe3b3201 149 $date = DateTime::Format::W3CDTF->parse_datetime($ts);
150 }
151 };
152 return $date;
0d5e38d1 153 }
154}
155
156sub entries {
157 my $rss = $_[0]->{rss};
158 my @entries;
159 for my $item (@{ $rss->{items} }) {
729cd7a8 160 push @entries, XML::Feed::Entry::Format::RSS->wrap($item);
3bdbab6f 161 $entries[-1]->{_version} = $rss->{'version'};
0d5e38d1 162 }
163 @entries;
164}
165
973e1f9e 166sub add_entry {
33d4cb3f 167 my $feed = shift;
168 my $entry = shift || return;
169 $entry = $feed->_convert_entry($entry);
973e1f9e 170 $feed->{rss}->add_item(%{ $entry->unwrap });
171}
172
173sub as_xml { $_[0]->{rss}->as_string }
174
729cd7a8 175package XML::Feed::Entry::Format::RSS;
0d5e38d1 176use strict;
177
3bdbab6f 178sub format { 'RSS ' . $_[0]->{'_version'} }
179
a749d9b9 180use XML::Feed::Content;
181
0d5e38d1 182use base qw( XML::Feed::Entry );
183
973e1f9e 184sub init_empty { $_[0]->{entry} = { } }
185
5383a560 186sub base {
187 my $entry = shift;
188 @_ ? $entry->{entry}->{'xml:base'} = $_[0] : $entry->{entry}->{'xml:base'};
189}
190
973e1f9e 191sub title {
192 my $entry = shift;
193 @_ ? $entry->{entry}{title} = $_[0] : $entry->{entry}{title};
194}
195
196sub link {
197 my $entry = shift;
198 if (@_) {
199 $entry->{entry}{link} = $_[0];
200 ## For RSS 2.0 output from XML::RSS. Sigh.
201 $entry->{entry}{permaLink} = $_[0];
202 } else {
710ed823 203 my $link = $entry->{entry}{link} ||
204 $entry->{entry}{permaLink} ||
205 $entry->{entry}{guid};
3b96974d 206 if (defined $link) {
207 $link =~ s/^\s+//;
208 $link =~ s/\s+$//;
209 }
f9cc521d 210 return $link;
973e1f9e 211 }
212}
a749d9b9 213
214sub summary {
973e1f9e 215 my $item = shift->{entry};
216 if (@_) {
217 $item->{description} = ref($_[0]) eq 'XML::Feed::Content' ?
218 $_[0]->body : $_[0];
219 ## Because of the logic below, we need to add some dummy content,
220 ## so that we'll properly recognize the description we enter as
221 ## the summary.
fe3b3201 222 if (!$item->{content}{encoded} &&
973e1f9e 223 !$item->{'http://www.w3.org/1999/xhtml'}{body}) {
fe3b3201 224 $item->{content}{encoded} = ' ';
973e1f9e 225 }
226 } else {
227 ## Some RSS feeds use <description> for a summary, and some use it
228 ## for the full content. Pretty gross. We don't want to return the
229 ## full content if the caller expects a summary, so the heuristic is:
230 ## if the <entry> contains both a <description> and one of the elements
231 ## typically used for the full content, use <description> as summary.
232 my $txt;
233 if ($item->{description} &&
fe3b3201 234 ($item->{content}{encoded} ||
973e1f9e 235 $item->{'http://www.w3.org/1999/xhtml'}{body})) {
236 $txt = $item->{description};
723eaa15 237 ## Blogspot's 'short' RSS feeds do this in the Atom namespace
238 ## for no obviously good reason.
239 } elsif ($item->{'http://www.w3.org/2005/Atom'}{summary}) {
240 $txt = $item->{'http://www.w3.org/2005/Atom'}{summary};
973e1f9e 241 }
242 XML::Feed::Content->wrap({ type => 'text/plain', body => $txt });
a749d9b9 243 }
a749d9b9 244}
0d5e38d1 245
246sub content {
973e1f9e 247 my $item = shift->{entry};
248 if (@_) {
e3b96b9c 249 my $c;
250 if (ref($_[0]) eq 'XML::Feed::Content') {
251 if (defined $_[0]->base) {
252 $c = { 'content' => $_[0]->body, 'xml:base' => $_[0]->base };
253 } else {
254 $c = $_[0]->body;
255 }
256 } else {
257 $c = $_[0];
258 }
fe3b3201 259 $item->{content}{encoded} = $c;
973e1f9e 260 } else {
7c89ffff 261 my $base;
973e1f9e 262 my $body =
fe3b3201 263 $item->{content}{encoded} ||
973e1f9e 264 $item->{'http://www.w3.org/1999/xhtml'}{body} ||
e3b96b9c 265 $item->{description};
266 if ('HASH' eq ref($body)) {
267 $base = $body->{'xml:base'};
268 $body = $body->{content};
269 }
7c89ffff 270 XML::Feed::Content->wrap({ type => 'text/html', body => $body, base => $base });
973e1f9e 271 }
0d5e38d1 272}
273
274sub category {
a0cca2a4 275 my $entry = shift;
276 my $item = $entry->{entry};
973e1f9e 277 if (@_) {
a0cca2a4 278 my @tmp = ($entry->category, @_);
279 $item->{category} = [@tmp];
280 $item->{dc}{subject} = [@tmp];
973e1f9e 281 } else {
a0cca2a4 282 my $r = $item->{category} || $item->{dc}{subject};
e5a65e5e 283 my @r = ref($r) eq 'ARRAY' ? @$r : defined $r? ($r) : ();
a0cca2a4 284 return wantarray? @r : $r[0];
973e1f9e 285 }
0d5e38d1 286}
287
288sub author {
973e1f9e 289 my $item = shift->{entry};
290 if (@_) {
291 $item->{author} = $item->{dc}{creator} = $_[0];
292 } else {
293 $item->{author} || $item->{dc}{creator};
294 }
0d5e38d1 295}
296
297## XML::RSS doesn't give us access to the rdf:about for the <item>,
298## so we have to fall back to the <link> element in RSS 1.0 feeds.
299sub id {
973e1f9e 300 my $item = shift->{entry};
301 if (@_) {
302 $item->{guid} = $_[0];
303 } else {
304 $item->{guid} || $item->{link};
305 }
0d5e38d1 306}
307
308sub issued {
973e1f9e 309 my $item = shift->{entry};
310 if (@_) {
311 $item->{dc}{date} = DateTime::Format::W3CDTF->format_datetime($_[0]);
312 $item->{pubDate} = DateTime::Format::Mail->format_datetime($_[0]);
313 } else {
ecac864a 314 ## Either of these could die if the format is invalid.
315 my $date;
316 eval {
317 if (my $ts = $item->{pubDate}) {
318 my $parser = DateTime::Format::Mail->new;
319 $parser->loose;
710ed823 320 $ts =~ s/^\s+//;
321 $ts =~ s/\s+$//;
ecac864a 322 $date = $parser->parse_datetime($ts);
daba7240 323 } elsif ($ts = $item->{dc}{date} or $ts = $item->{dcterms}{date}) {
710ed823 324 $ts =~ s/^\s+//;
325 $ts =~ s/\s+$//;
326 $date = DateTime::Format::W3CDTF->parse_datetime($ts);
ecac864a 327 }
328 };
329 return $date;
0d5e38d1 330 }
331}
332
333sub modified {
973e1f9e 334 my $item = shift->{entry};
335 if (@_) {
fe3b3201 336 $item->{dcterms}{modified} =
973e1f9e 337 DateTime::Format::W3CDTF->format_datetime($_[0]);
338 } else {
710ed823 339 if (my $ts = $item->{dcterms}{modified} ||
340 $item->{'http://www.w3.org/2005/Atom'}{updated}) {
341 $ts =~ s/^\s+//;
342 $ts =~ s/\s+$//;
723eaa15 343 return eval { DateTime::Format::W3CDTF->parse_datetime($ts) } || eval { XML::Atom::Util::iso2dt($ts) };
344 }
0d5e38d1 345 }
346}
347
9a36f82c 348sub lat {
349 my $item = shift->{entry};
350 if (@_) {
bd19b444 351 $item->{geo}{lat} = $_[0];
9a36f82c 352 } else {
bd19b444 353 return $item->{geo}{lat};
9a36f82c 354 }
355}
356
357sub long {
358 my $item = shift->{entry};
359 if (@_) {
bd19b444 360 $item->{geo}{long} = $_[0];
9a36f82c 361 } else {
bd19b444 362 return $item->{geo}{long};
9a36f82c 363 }
364}
365
12a4079f 366sub enclosure {
367 my $entry = shift;
368
369 if (@_) {
370 my $enclosure = shift;
b1aa7a62 371 my $val = {
af6b00a4 372 url => $enclosure->{url},
373 type => $enclosure->{type},
374 length => $enclosure->{length}
b1aa7a62 375 };
376 if ($XML::Feed::MULTIPLE_ENCLOSURES) {
377 push @{$entry->{entry}->{enclosure}}, $val;
378 } else {
379 $entry->{entry}->{enclosure} = $val;
380 }
12a4079f 381 } else {
b1aa7a62 382 my $tmp = $entry->{entry}->{enclosure};
7b5fccb1 383 if (defined $tmp) {
384 my @encs = map { XML::Feed::Enclosure->new($_) }
385 (ref $tmp eq 'ARRAY')? @$tmp : ($tmp);
386 return ($XML::Feed::MULTIPLE_ENCLOSURES)? @encs : $encs[-1];
387 }
388 return;
12a4079f 389 }
af6b00a4 390}
9a36f82c 391
0d5e38d1 3921;