+# $Id: Changes,v 1.4 2004/07/29 16:42:29 btrott Exp $
+
+Revision history for XML::Feed
+
+0.02
+ - Changed behavior of Entry->summary to prevent it from returning the
+ full contents of the entry. Now, in an RSS feed, summary only returns
+ a value if there is both a <description> element *and* one of the
+ other elements typically used for the full content.
+ - Changed content model for Entry->content and Entry->summary.
+ They now return an XML::Feed::Content object, which knows about both
+ the actual content and the MIME type of the content.
+ - Improved feed format detection by first tag in feed.
+
0.01 2004.06.01
- Initial distribution.
inc/Module/Install/WriteAll.pm
lib/XML/Feed.pm
lib/XML/Feed/Atom.pm
+lib/XML/Feed/Content.pm
lib/XML/Feed/Entry.pm
lib/XML/Feed/ErrorHandler.pm
lib/XML/Feed/RSS.pm
t/01-parse.t
t/samples/atom.xml
t/samples/rss10.xml
+t/samples/rss20-no-summary.xml
t/samples/rss20.xml
name: XML-Feed
-version: 0.01
+version: 0.02
abstract: XML Syndication Feed Support
author: Benjamin Trott <cpan@stupidfool.org>
license: perl
-# $Id: Feed.pm,v 1.6 2004/05/30 16:59:02 btrott Exp $
+# $Id: Feed.pm,v 1.8 2004/07/29 16:44:18 btrott Exp $
package XML::Feed;
use strict;
use HTML::Parser;
use vars qw( $VERSION );
-$VERSION = '0.01';
+$VERSION = '0.02';
use constant FEED_MIME_TYPES => [
'application/x.atom+xml',
## Auto-detect feed type based on first element. This is prone
## to breakage, but then again we don't want to parse the whole
## feed ourselves.
- my($tag) = $xml =~ /<([a-zA-Z]\S+)/s;
+ my $tag;
+ while ($xml =~ /<(\S+)/sg) {
+ (my $t = $1) =~ tr/a-zA-Z0-9:\-\?//cd;
+ $tag = $t, last unless substr($t, 0, 1) eq '?';
+ }
+ return $class->error("Cannot find first element") unless $tag;
$tag =~ s/^.*://;
if ($tag eq 'rss' || $tag eq 'RDF') {
require XML::Feed::RSS;
-# $Id: Atom.pm,v 1.1.1.1 2004/05/29 17:29:56 btrott Exp $
+# $Id: Atom.pm,v 1.2 2004/06/20 15:20:37 btrott Exp $
package XML::Feed::Atom;
use strict;
use base qw( XML::Feed::Entry );
use XML::Atom::Util qw( iso2dt );
+use XML::Feed::Content;
use List::Util qw( first );
sub title { $_[0]->{entry}->title }
my $l = first { $_->rel eq 'alternate' } $_[0]->{entry}->link;
$l ? $l->href : undef;
}
-sub summary { $_[0]->{entry}->summary }
-sub content { $_[0]->{entry}->content ? $_[0]->{entry}->content->body : undef }
+
+sub summary {
+ XML::Feed::Content->wrap({ type => 'text/html',
+ body => $_[0]->{entry}->summary });
+}
+
+sub content {
+ my $c = $_[0]->{entry}->content;
+ XML::Feed::Content->wrap({ type => $c ? $c->type : undef,
+ body => $c ? $c->body : undef });
+}
sub category {
my $ns = XML::Atom::Namespace->new(dc => 'http://purl.org/dc/elements/1.1/');
--- /dev/null
+# $Id: Content.pm,v 1.1 2004/06/20 15:20:38 btrott Exp $
+
+package XML::Feed::Content;
+use strict;
+
+use base qw( XML::Feed::ErrorHandler );
+
+sub wrap {
+ my $class = shift;
+ my($c) = @_;
+ bless { %$c }, $class;
+}
+
+sub _var {
+ my $content = shift;
+ my $var = shift;
+ $content->{$var} = shift if @_;
+ $content->{$var};
+}
+
+sub type { shift->_var('type', @_) }
+sub body { shift->_var('body', @_) }
+
+1;
+__END__
+
+=head1 NAME
+
+XML::Feed::Content - Wrapper for content objects
+
+=head1 SYNOPSIS
+
+ my $content = $entry->content;
+ print $content->body;
+
+=head1 DESCRIPTION
+
+I<XML::Feed::Content> represents a content object in an I<XML::Feed::Entry>
+entry in a syndication feed. This could be a I<E<lt>descriptionE<gt>>
+element in an RSS feed, a I<E<lt>contentE<gt>> element in an Atom feed,
+etc. In other words, any element where knowing both the actual data and the
+B<type> of data is useful.
+
+=head1 USAGE
+
+=head2 $content->body
+
+The actual data.
+
+=head2 $content->type
+
+The MIME type of the content in I<body>.
+
+This is really only useful in Atom feeds, because RSS feeds do not specify
+the type of content included in an entry. In RSS feeds, generally the MIME
+type defaults to I<text/html>.
+
+=head1 AUTHOR & COPYRIGHT
+
+Please see the I<XML::Feed> manpage for author, copyright, and license
+information.
+
+=cut
-# $Id: Entry.pm,v 1.1.1.1 2004/05/29 17:29:56 btrott Exp $
+# $Id: Entry.pm,v 1.3 2004/07/29 16:42:29 btrott Exp $
package XML::Feed::Entry;
use strict;
=head2 $entry->content
-The full entry body, or as much as is available in the feed.
+Bn I<XML::Feed::Content> object representing the full entry body, or as
+much as is available in the feed.
In RSS feeds, this method will look first for
I<http://purl.org/rss/1.0/modules/content/#encoded> and
=head2 $entry->summary
-A short summary of the entry. Possibly.
+An I<XML::Feed::Content> object representing a short summary of the entry.
+Possibly.
Since RSS feeds do not have the idea of a summary separate from the entry
-body, this may return the same value as the I<$entry-E<gt>content> method.
-But it won't always, even with RSS feeds. For example, a number of RSS feeds
-use an element like I<http://purl.org/rss/1.0/modules/content/#encoded>
-for the entry body and put an excerpt in the I<E<lt>descriptionE<gt>> element;
-in those cases, this method will return the excerpt.
+body, this may not always be what you want. If the entry contains both a
+I<E<lt>descriptionE<gt>> element B<and> another element typically used for
+the full content of the entry--either I<http://www.w3.org/1999/xhtml/body>
+or I<http://purl.org/rss/1.0/modules/content/#encoded>--we treat that as
+the summary. Otherwise, we assume that there isn't a summary, and return
+an I<XML::Feed::Content> object with an empty string in the I<body>.
=head2 $entry->category
-# $Id: RSS.pm,v 1.3 2004/05/30 09:39:52 btrott Exp $
+# $Id: RSS.pm,v 1.5 2004/07/29 16:42:29 btrott Exp $
package XML::Feed::RSS;
use strict;
package XML::Feed::RSS::Entry;
use strict;
+use XML::Feed::Content;
+
use base qw( XML::Feed::Entry );
sub title { $_[0]->{entry}{title} }
sub link { $_[0]->{entry}{link} }
-sub summary { $_[0]->{entry}{description} }
+
+sub summary {
+ my $item = $_[0]->{entry};
+ ## Some RSS feeds use <description> for a summary, and some use it
+ ## for the full content. Pretty gross. We don't want to return the
+ ## full content if the caller expects a summary, so the heuristic is:
+ ## if the <entry> contains both a <description> and one of the elements
+ ## typically used for the full content, use <description> as the summary.
+ my $txt;
+ if ($item->{description} &&
+ ($item->{'http://purl.org/rss/1.0/modules/content/'}{encoded} ||
+ $item->{'http://www.w3.org/1999/xhtml'}{body})) {
+ $txt = $item->{description};
+ }
+ XML::Feed::Content->wrap({ type => 'text/plain', body => $txt });
+}
sub content {
my $item = $_[0]->{entry};
- $_[0]->{entry}{'http://purl.org/rss/1.0/modules/content/'}{encoded} ||
- $_[0]->{entry}{'http://www.w3.org/1999/xhtml'}{body} ||
- $_[0]->{entry}{description};
+ my $body =
+ $_[0]->{entry}{'http://purl.org/rss/1.0/modules/content/'}{encoded} ||
+ $_[0]->{entry}{'http://www.w3.org/1999/xhtml'}{body} ||
+ $_[0]->{entry}{description};
+ XML::Feed::Content->wrap({ type => 'text/html', body => $body });
}
sub category {
-# $Id: 01-parse.t,v 1.2 2004/05/30 09:39:52 btrott Exp $
+# $Id: 01-parse.t,v 1.5 2004/07/29 16:43:33 btrott Exp $
use strict;
use Test;
use XML::Feed;
use URI;
-BEGIN { plan tests => 68 }
+BEGIN { plan tests => 70 }
my %Feeds = (
't/samples/atom.xml' => 'Atom',
ok(ref($dt), 'DateTime');
$dt->set_time_zone('UTC');
ok($dt->iso8601, '2004-05-30T07:39:25');
- ok($entry->content =~ /<p>Hello!<\/p>/);
- ok($entry->summary, 'Hello!...');
+ ok($entry->content->body =~ /<p>Hello!<\/p>/);
+ ok($entry->summary->body, 'Hello!...');
ok($entry->category, 'Travel');
ok($entry->author, 'Melody');
ok($entry->id);
}
+
+$feed = XML::Feed->parse('t/samples/rss20-no-summary.xml')
+ or die XML::Feed->errstr;
+my $entry = ($feed->entries)[0];
+ok(!$entry->summary->body);
+ok($entry->content->body =~ m!<p>This is a test.</p>!);
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<rss version="2.0" xmlns:xhtml="http://www.w3.org/1999/xhtml">
+<channel>
+<title>First Weblog</title>
+<link>http://localhost/weblog/</link>
+<description>This is a test weblog.</description>
+
+<item>
+<title>Test</title>
+<description><p>This is a test.</p>
+
+<p>Why don't you come down to our place for a coffee and a <strong>chat</strong>?</p></description>
+<link>http://localhost/weblog/2004/05/test.html</link>
+</item>
+</channel>
+</rss>