Commit | Line | Data |
3353d70c |
1 | # $Id$ |
0d5e38d1 |
2 | |
729cd7a8 |
3 | package XML::Feed::Format::RSS; |
0d5e38d1 |
4 | use strict; |
5 | |
6 | use base qw( XML::Feed ); |
0d5e38d1 |
7 | use DateTime::Format::Mail; |
8 | use DateTime::Format::W3CDTF; |
723eaa15 |
9 | use XML::Atom::Util qw(iso2dt); |
0d5e38d1 |
10 | |
973e1f9e |
11 | our $PREFERRED_PARSER = "XML::RSS"; |
12 | |
9b6bc912 |
13 | |
14 | sub identify { |
15 | my $class = shift; |
16 | my $xml = shift; |
17 | my $tag = $class->_get_first_tag($xml); |
18 | return ($tag eq 'rss' || $tag eq 'RDF'); |
19 | } |
20 | |
973e1f9e |
21 | sub init_empty { |
4e9c4625 |
22 | my ($feed, %args) = @_; |
23 | $args{'version'} ||= '2.0'; |
973e1f9e |
24 | eval "use $PREFERRED_PARSER"; die $@ if $@; |
4e9c4625 |
25 | $feed->{rss} = $PREFERRED_PARSER->new(%args); |
fe3b3201 |
26 | $feed->{rss}->add_module(prefix => "content", uri => 'http://purl.org/rss/1.0/modules/content/'); |
813f78d8 |
27 | $feed->{rss}->add_module(prefix => "dcterms", uri => 'http://purl.org/dc/terms/'); |
9a36f82c |
28 | $feed->{rss}->add_module(prefix => "atom", uri => 'http://www.w3.org/2005/Atom'); |
29 | $feed->{rss}->add_module(prefix => "geo", uri => 'http://www.w3.org/2003/01/geo/wgs84_pos#'); |
973e1f9e |
30 | $feed; |
31 | } |
32 | |
0d5e38d1 |
33 | sub init_string { |
34 | my $feed = shift; |
35 | my($str) = @_; |
973e1f9e |
36 | $feed->init_empty; |
b1aa7a62 |
37 | my $opts = { |
38 | hashrefs_instead_of_strings => 1, |
39 | }; |
40 | $opts->{allow_multiple} = [ 'enclosure' ] if $XML::Feed::MULTIPLE_ENCLOSURES; |
0d5e38d1 |
41 | if ($str) { |
b1aa7a62 |
42 | $feed->{rss}->parse($$str, $opts ); |
0d5e38d1 |
43 | } |
44 | $feed; |
45 | } |
46 | |
47 | sub format { 'RSS ' . $_[0]->{rss}->{'version'} } |
48 | |
49 | ## The following elements are the same in all versions of RSS. |
973e1f9e |
50 | sub title { shift->{rss}->channel('title', @_) } |
51 | sub link { shift->{rss}->channel('link', @_) } |
52 | sub description { shift->{rss}->channel('description', @_) } |
e23387f1 |
53 | sub updated { shift->modified(@_) } |
0d5e38d1 |
54 | |
813f78d8 |
55 | # This doesn't exist in RSS |
56 | sub id { } |
57 | |
0d5e38d1 |
58 | ## This is RSS 2.0 only--what's the equivalent in RSS 1.0? |
973e1f9e |
59 | sub copyright { shift->{rss}->channel('copyright', @_) } |
0d5e38d1 |
60 | |
5383a560 |
61 | sub base { |
62 | my $feed = shift; |
63 | if (@_) { |
64 | $feed->{rss}->{'xml:base'} = $_[0]; |
65 | } else { |
66 | $feed->{rss}->{'xml:base'}; |
67 | } |
68 | } |
69 | |
0d5e38d1 |
70 | ## The following all work transparently in any RSS version. |
71 | sub language { |
973e1f9e |
72 | my $feed = shift; |
73 | if (@_) { |
74 | $feed->{rss}->channel('language', $_[0]); |
75 | $feed->{rss}->channel->{dc}{language} = $_[0]; |
76 | } else { |
77 | $feed->{rss}->channel('language') || |
78 | $feed->{rss}->channel->{dc}{language}; |
79 | } |
0d5e38d1 |
80 | } |
81 | |
9a36f82c |
82 | sub self_link { |
83 | my $feed = shift; |
84 | |
85 | if (@_) { |
86 | my $uri = shift; |
87 | |
88 | $feed->{rss}->channel->{'atom'}{'link'} = |
89 | { |
90 | rel => "self", |
91 | href => $uri, |
92 | type => "application/rss+xml", |
93 | }; |
94 | } |
95 | |
96 | return $feed->{rss}->channel->{'atom'}{'link'}; |
97 | } |
98 | |
99 | |
0d5e38d1 |
100 | sub generator { |
973e1f9e |
101 | my $feed = shift; |
102 | if (@_) { |
103 | $feed->{rss}->channel('generator', $_[0]); |
104 | $feed->{rss}->channel->{'http://webns.net/mvcb/'}{generatorAgent} = |
105 | $_[0]; |
106 | } else { |
107 | $feed->{rss}->channel('generator') || |
108 | $feed->{rss}->channel->{'http://webns.net/mvcb/'}{generatorAgent}; |
109 | } |
0d5e38d1 |
110 | } |
111 | |
112 | sub author { |
973e1f9e |
113 | my $feed = shift; |
114 | if (@_) { |
115 | $feed->{rss}->channel('webMaster', $_[0]); |
116 | $feed->{rss}->channel->{dc}{creator} = $_[0]; |
117 | } else { |
118 | $feed->{rss}->channel('webMaster') || |
119 | $feed->{rss}->channel->{dc}{creator}; |
120 | } |
0d5e38d1 |
121 | } |
122 | |
123 | sub modified { |
973e1f9e |
124 | my $rss = shift->{rss}; |
125 | if (@_) { |
126 | $rss->channel('pubDate', |
127 | DateTime::Format::Mail->format_datetime($_[0])); |
128 | ## XML::RSS is so weird... if I set this, it will try to use |
129 | ## the value for the lastBuildDate, which I don't want--because |
130 | ## this date is formatted for an RSS 1.0 feed. So it's commented out. |
131 | #$rss->channel->{dc}{date} = |
132 | # DateTime::Format::W3CDTF->format_datetime($_[0]); |
fe3b3201 |
133 | } else { |
134 | my $date; |
135 | eval { |
136 | if (my $ts = $rss->channel('pubDate')) { |
137 | $date = DateTime::Format::Mail->parse_datetime($ts); |
138 | } elsif ($ts = $rss->channel->{dc}{date}) { |
139 | $date = DateTime::Format::W3CDTF->parse_datetime($ts); |
140 | } |
141 | }; |
142 | return $date; |
0d5e38d1 |
143 | } |
144 | } |
145 | |
146 | sub entries { |
147 | my $rss = $_[0]->{rss}; |
148 | my @entries; |
149 | for my $item (@{ $rss->{items} }) { |
729cd7a8 |
150 | push @entries, XML::Feed::Entry::Format::RSS->wrap($item); |
3bdbab6f |
151 | $entries[-1]->{_version} = $rss->{'version'}; |
0d5e38d1 |
152 | } |
153 | @entries; |
154 | } |
155 | |
973e1f9e |
156 | sub add_entry { |
33d4cb3f |
157 | my $feed = shift; |
158 | my $entry = shift || return; |
159 | $entry = $feed->_convert_entry($entry); |
973e1f9e |
160 | $feed->{rss}->add_item(%{ $entry->unwrap }); |
161 | } |
162 | |
163 | sub as_xml { $_[0]->{rss}->as_string } |
164 | |
729cd7a8 |
165 | package XML::Feed::Entry::Format::RSS; |
0d5e38d1 |
166 | use strict; |
167 | |
3bdbab6f |
168 | sub format { 'RSS ' . $_[0]->{'_version'} } |
169 | |
a749d9b9 |
170 | use XML::Feed::Content; |
171 | |
0d5e38d1 |
172 | use base qw( XML::Feed::Entry ); |
173 | |
973e1f9e |
174 | sub init_empty { $_[0]->{entry} = { } } |
175 | |
5383a560 |
176 | sub base { |
177 | my $entry = shift; |
178 | @_ ? $entry->{entry}->{'xml:base'} = $_[0] : $entry->{entry}->{'xml:base'}; |
179 | } |
180 | |
973e1f9e |
181 | sub title { |
182 | my $entry = shift; |
183 | @_ ? $entry->{entry}{title} = $_[0] : $entry->{entry}{title}; |
184 | } |
185 | |
186 | sub link { |
187 | my $entry = shift; |
188 | if (@_) { |
189 | $entry->{entry}{link} = $_[0]; |
190 | ## For RSS 2.0 output from XML::RSS. Sigh. |
191 | $entry->{entry}{permaLink} = $_[0]; |
192 | } else { |
193 | $entry->{entry}{link} || $entry->{entry}{guid}; |
194 | } |
195 | } |
a749d9b9 |
196 | |
197 | sub summary { |
973e1f9e |
198 | my $item = shift->{entry}; |
199 | if (@_) { |
200 | $item->{description} = ref($_[0]) eq 'XML::Feed::Content' ? |
201 | $_[0]->body : $_[0]; |
202 | ## Because of the logic below, we need to add some dummy content, |
203 | ## so that we'll properly recognize the description we enter as |
204 | ## the summary. |
fe3b3201 |
205 | if (!$item->{content}{encoded} && |
973e1f9e |
206 | !$item->{'http://www.w3.org/1999/xhtml'}{body}) { |
fe3b3201 |
207 | $item->{content}{encoded} = ' '; |
973e1f9e |
208 | } |
209 | } else { |
210 | ## Some RSS feeds use <description> for a summary, and some use it |
211 | ## for the full content. Pretty gross. We don't want to return the |
212 | ## full content if the caller expects a summary, so the heuristic is: |
213 | ## if the <entry> contains both a <description> and one of the elements |
214 | ## typically used for the full content, use <description> as summary. |
215 | my $txt; |
216 | if ($item->{description} && |
fe3b3201 |
217 | ($item->{content}{encoded} || |
973e1f9e |
218 | $item->{'http://www.w3.org/1999/xhtml'}{body})) { |
219 | $txt = $item->{description}; |
723eaa15 |
220 | ## Blogspot's 'short' RSS feeds do this in the Atom namespace |
221 | ## for no obviously good reason. |
222 | } elsif ($item->{'http://www.w3.org/2005/Atom'}{summary}) { |
223 | $txt = $item->{'http://www.w3.org/2005/Atom'}{summary}; |
973e1f9e |
224 | } |
225 | XML::Feed::Content->wrap({ type => 'text/plain', body => $txt }); |
a749d9b9 |
226 | } |
a749d9b9 |
227 | } |
0d5e38d1 |
228 | |
229 | sub content { |
973e1f9e |
230 | my $item = shift->{entry}; |
231 | if (@_) { |
e3b96b9c |
232 | my $c; |
233 | if (ref($_[0]) eq 'XML::Feed::Content') { |
234 | if (defined $_[0]->base) { |
235 | $c = { 'content' => $_[0]->body, 'xml:base' => $_[0]->base }; |
236 | } else { |
237 | $c = $_[0]->body; |
238 | } |
239 | } else { |
240 | $c = $_[0]; |
241 | } |
fe3b3201 |
242 | $item->{content}{encoded} = $c; |
973e1f9e |
243 | } else { |
7c89ffff |
244 | my $base; |
973e1f9e |
245 | my $body = |
fe3b3201 |
246 | $item->{content}{encoded} || |
973e1f9e |
247 | $item->{'http://www.w3.org/1999/xhtml'}{body} || |
e3b96b9c |
248 | $item->{description}; |
249 | if ('HASH' eq ref($body)) { |
250 | $base = $body->{'xml:base'}; |
251 | $body = $body->{content}; |
252 | } |
7c89ffff |
253 | XML::Feed::Content->wrap({ type => 'text/html', body => $body, base => $base }); |
973e1f9e |
254 | } |
0d5e38d1 |
255 | } |
256 | |
257 | sub category { |
a0cca2a4 |
258 | my $entry = shift; |
259 | my $item = $entry->{entry}; |
973e1f9e |
260 | if (@_) { |
a0cca2a4 |
261 | my @tmp = ($entry->category, @_); |
262 | $item->{category} = [@tmp]; |
263 | $item->{dc}{subject} = [@tmp]; |
973e1f9e |
264 | } else { |
a0cca2a4 |
265 | my $r = $item->{category} || $item->{dc}{subject}; |
e5a65e5e |
266 | my @r = ref($r) eq 'ARRAY' ? @$r : defined $r? ($r) : (); |
a0cca2a4 |
267 | return wantarray? @r : $r[0]; |
973e1f9e |
268 | } |
0d5e38d1 |
269 | } |
270 | |
271 | sub author { |
973e1f9e |
272 | my $item = shift->{entry}; |
273 | if (@_) { |
274 | $item->{author} = $item->{dc}{creator} = $_[0]; |
275 | } else { |
276 | $item->{author} || $item->{dc}{creator}; |
277 | } |
0d5e38d1 |
278 | } |
279 | |
280 | ## XML::RSS doesn't give us access to the rdf:about for the <item>, |
281 | ## so we have to fall back to the <link> element in RSS 1.0 feeds. |
282 | sub id { |
973e1f9e |
283 | my $item = shift->{entry}; |
284 | if (@_) { |
285 | $item->{guid} = $_[0]; |
286 | } else { |
287 | $item->{guid} || $item->{link}; |
288 | } |
0d5e38d1 |
289 | } |
290 | |
291 | sub issued { |
973e1f9e |
292 | my $item = shift->{entry}; |
293 | if (@_) { |
294 | $item->{dc}{date} = DateTime::Format::W3CDTF->format_datetime($_[0]); |
295 | $item->{pubDate} = DateTime::Format::Mail->format_datetime($_[0]); |
296 | } else { |
ecac864a |
297 | ## Either of these could die if the format is invalid. |
298 | my $date; |
299 | eval { |
300 | if (my $ts = $item->{pubDate}) { |
301 | my $parser = DateTime::Format::Mail->new; |
302 | $parser->loose; |
303 | $date = $parser->parse_datetime($ts); |
daba7240 |
304 | } elsif ($ts = $item->{dc}{date} or $ts = $item->{dcterms}{date}) { |
305 | $date = DateTime::Format::W3CDTF->parse_datetime($ts); |
ecac864a |
306 | } |
307 | }; |
308 | return $date; |
0d5e38d1 |
309 | } |
310 | } |
311 | |
312 | sub modified { |
973e1f9e |
313 | my $item = shift->{entry}; |
314 | if (@_) { |
fe3b3201 |
315 | $item->{dcterms}{modified} = |
973e1f9e |
316 | DateTime::Format::W3CDTF->format_datetime($_[0]); |
317 | } else { |
723eaa15 |
318 | if (my $ts = $item->{dcterms}{modified} || $item->{'http://www.w3.org/2005/Atom'}{updated}) { |
319 | return eval { DateTime::Format::W3CDTF->parse_datetime($ts) } || eval { XML::Atom::Util::iso2dt($ts) }; |
320 | } |
0d5e38d1 |
321 | } |
322 | } |
323 | |
9a36f82c |
324 | sub lat { |
325 | my $item = shift->{entry}; |
326 | if (@_) { |
bd19b444 |
327 | $item->{geo}{lat} = $_[0]; |
9a36f82c |
328 | } else { |
bd19b444 |
329 | return $item->{geo}{lat}; |
9a36f82c |
330 | } |
331 | } |
332 | |
333 | sub long { |
334 | my $item = shift->{entry}; |
335 | if (@_) { |
bd19b444 |
336 | $item->{geo}{long} = $_[0]; |
9a36f82c |
337 | } else { |
bd19b444 |
338 | return $item->{geo}{long}; |
9a36f82c |
339 | } |
340 | } |
341 | |
12a4079f |
342 | sub enclosure { |
343 | my $entry = shift; |
344 | |
345 | if (@_) { |
346 | my $enclosure = shift; |
b1aa7a62 |
347 | my $val = { |
af6b00a4 |
348 | url => $enclosure->{url}, |
349 | type => $enclosure->{type}, |
350 | length => $enclosure->{length} |
b1aa7a62 |
351 | }; |
352 | if ($XML::Feed::MULTIPLE_ENCLOSURES) { |
353 | push @{$entry->{entry}->{enclosure}}, $val; |
354 | } else { |
355 | $entry->{entry}->{enclosure} = $val; |
356 | } |
12a4079f |
357 | } else { |
b1aa7a62 |
358 | my $tmp = $entry->{entry}->{enclosure}; |
359 | my @encs = map { XML::Feed::Enclosure->new($_) } (ref $tmp eq 'ARRAY')? @$tmp : ($tmp); |
360 | return ($XML::Feed::MULTIPLE_ENCLOSURES)? @encs : $encs[-1]; |
12a4079f |
361 | } |
af6b00a4 |
362 | } |
9a36f82c |
363 | |
0d5e38d1 |
364 | 1; |