9 use XML::RSS::Private::Output::Base;
10 use XML::RSS::Private::Output::V0_9;
11 use XML::RSS::Private::Output::V0_91;
12 use XML::RSS::Private::Output::V1_0;
13 use XML::RSS::Private::Output::V2_0;
15 use vars qw($VERSION $AUTOLOAD @ISA $AUTO_ADD);
50 lastBuildDate => undef,
52 managingEditor => undef,
65 skipDays => {day => undef,},
66 skipHours => {hour => undef,},
81 managingEditor => undef,
84 lastBuildDate => undef,
101 description => undef,
103 skipDays => {day => undef,},
104 skipHours => {hour => undef,},
107 description => undef,
115 description => undef,
125 description => undef,
133 # define required elements for RSS 0.9
137 "description" => [1, 500],
151 "description" => [1, 100],
157 # define required elements for RSS 0.91
161 "description" => [1, 500],
163 "language" => [1, 5],
164 "rating" => [0, 500],
165 "copyright" => [0, 100],
166 "pubDate" => [0, 100],
167 "lastBuildDate" => [0, 100],
169 "managingEditor" => [0, 100],
170 "webMaster" => [0, 100],
177 "height" => [0, 400],
178 "description" => [0, 500]
183 "description" => [0, 500]
187 "description" => [1, 500],
191 skipHours => {"hour" => [1, 23]},
192 skipDays => {"day" => [1, 10]}
195 # define required elements for RSS 2.0
199 "description" => [1, 500],
201 "language" => [0, 5],
202 "rating" => [0, 500],
203 "copyright" => [0, 100],
204 "pubDate" => [0, 100],
205 "lastBuildDate" => [0, 100],
207 "managingEditor" => [0, 100],
208 "webMaster" => [0, 100],
215 "height" => [0, 400],
216 "description" => [0, 500]
221 "description" => [0, 500]
225 "description" => [1, 500],
229 skipHours => {"hour" => [1, 23]},
230 skipDays => {"day" => [1, 10]}
233 my $namespace_map = {
234 rss10 => 'http://purl.org/rss/1.0/',
235 rss09 => 'http://my.netscape.com/rdf/simple/0.9/',
237 # rss091 => 'http://purl.org/rss/1.0/modules/rss091/',
238 rss20 => 'http://backend.userland.com/blogChannelModule',
241 sub _rdf_resource_fields {
243 'http://webns.net/mvcb/' => {
244 'generatorAgent' => 1,
245 'errorReportsTo' => 1
247 'http://purl.org/rss/1.0/modules/annotate/' => {'reference' => 1},
248 'http://my.theinfo.org/changed/1.0/rss/' => {'server' => 1}
252 my %empty_ok_elements = (enclosure => 1);
253 my %hashref_ok_elements = (description => 1);
255 sub _get_default_modules {
257 'http://purl.org/rss/1.0/modules/syndication/' => 'syn',
258 'http://purl.org/dc/elements/1.1/' => 'dc',
259 'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo',
260 'http://webns.net/mvcb/' => 'admin',
261 'http://purl.org/rss/1.0/modules/content/' => 'content',
265 sub _get_default_rss_2_0_modules {
266 return {'http://backend.userland.com/blogChannelModule' => 'blogChannel',};
269 sub _get_syn_ok_fields {
270 return [qw(updateBase updateFrequency updatePeriod)];
273 sub _get_dc_ok_fields {
300 $self->_initialize(@_);
305 sub _get_init_default_key_assignments {
307 {key => "version", default => '1.0',},
308 {key => "encode_output", default => 1,},
309 {key => "output", default => "",},
310 {key => "encoding", default => "UTF-8",},
311 {key => "encode_cb", default => undef(),},
312 {key => "xml:base", default => undef(),},
316 # This method resets the contents of the instance to an empty one (with no
317 # items, empty keys, etc.). Useful before parsing or during initialization.
323 $self->{_internal} = {};
325 # init num of items to 0
326 $self->{num_items} = 0;
331 delete $self->{_allow_multiple};
333 my $ok_fields = $self->_get_ok_fields();
336 exists($ok_fields->{$self->{version}})
337 ? $ok_fields->{$self->{version}}
338 : $ok_fields->{default};
340 while (my ($k, $v) = each(%$ver_ok_fields)) {
341 $self->{$k} = +{%{$v}};
351 # adhere to Netscape limits; no by default
352 $self->{'strict'} = 0;
355 $self->{namespaces} = {};
356 $self->{rss_namespace} = '';
357 foreach my $k (@{$self->_get_init_default_key_assignments()})
360 $self->{$key} = exists($hash{$key}) ? $hash{$key} : $k->{default};
365 ($self->{version} eq "2.0")
366 ? $self->_get_default_rss_2_0_modules()
367 : $self->_get_default_modules()
371 if (exists($hash{stylesheet})) {
372 $self->{stylesheet} = $hash{stylesheet};
375 if ($self->{version} eq "2.0") {
376 $self->{namespaces}->{'blogChannel'} = "http://backend.userland.com/blogChannelModule";
388 $hash->{prefix} =~ /^[a-z_][a-z0-9.\-_]*$/i
389 or croak "a namespace prefix should look like [A-Za-z_][A-Za-z0-9.\\-_]*";
392 or croak "a URI must be provided in a namespace declaration";
394 $self->{modules}->{$hash->{uri}} = $hash->{prefix};
401 # strict Netscape Netcenter length checks
402 if ($self->{'strict'}) {
404 # make sure we have a title and link
405 croak "title and link elements are required"
406 unless ($hash->{title} && $hash->{'link'});
408 # check string lengths
409 croak "title cannot exceed 100 characters in length"
410 if (length($hash->{title}) > 100);
411 croak "link cannot exceed 500 characters in length"
412 if (length($hash->{'link'}) > 500);
413 croak "description cannot exceed 500 characters in length"
414 if (exists($hash->{description})
415 && length($hash->{description}) > 500);
417 # make sure there aren't already 15 items
418 croak "total items cannot exceed 15 " if (@{$self->{items}} >= 15);
421 # add the item to the list
422 if (defined($hash->{mode}) && $hash->{mode} eq 'insert') {
423 unshift(@{$self->{items}}, $hash);
426 push(@{$self->{items}}, $hash);
429 # return reference to the list of items
430 return $self->{items};
434 # $self->_render_complete_rss_output($xml_version)
436 # This function is the workhorse of the XML output and does all the work of
437 # rendering the RSS, delegating the work to specialised functions.
439 # It accepts the requested version number as its argument.
441 sub _get_rendering_class {
442 my ($self, $ver) = @_;
446 return "XML::RSS::Private::Output::V1_0";
448 elsif ($ver eq "0.9")
450 return "XML::RSS::Private::Output::V0_9";
452 elsif ($ver eq "0.91")
454 return "XML::RSS::Private::Output::V0_91";
458 return "XML::RSS::Private::Output::V2_0";
462 sub _get_encode_cb_params
467 defined($self->{encode_cb}) ?
468 ("encode_cb" => $self->{encode_cb}) :
473 sub _get_rendering_obj {
474 my ($self, $ver) = @_;
476 return $self->_get_rendering_class($ver)->new(
480 $self->_get_encode_cb_params(),
485 sub _render_complete_rss_output {
486 my ($self, $ver) = @_;
488 return $self->_get_rendering_obj($ver)->_render_complete_rss_output();
492 return shift->_render_complete_rss_output("0.9");
496 return shift->_render_complete_rss_output("0.91");
500 return shift->_render_complete_rss_output("1.0");
504 return shift->_render_complete_rss_output("2.0");
509 sub _get_output_methods_map {
511 '0.9' => "as_rss_0_9",
512 '0.91' => "as_rss_0_9_1",
513 '2.0' => "as_rss_2_0",
514 '1.0' => "as_rss_1_0",
518 sub _get_default_output_method {
522 sub _get_output_method {
523 my ($self, $version) = @_;
525 if (my $output_method = $self->_get_output_methods_map()->{$version}) {
526 return $output_method;
529 return $self->_get_default_output_method();
533 sub _get_output_version {
535 return ($self->{output} =~ /\d/) ? $self->{output} : $self->{version};
538 # This is done to preserve backwards compatibility with older versions
539 # of XML-RSS that had the channel/{link,description,title} as the empty
543 my $callback = shift;
545 local $self->{channel}->{'link'} = $self->{channel}->{'link'};
546 local $self->{channel}->{'description'} = $self->{channel}->{'description'};
547 local $self->{channel}->{'title'} = $self->{channel}->{'title'};
549 foreach my $field (qw(link description title))
551 if (!defined($self->{channel}->{$field}))
553 $self->{channel}->{$field} = '';
557 return $callback->();
563 my $version = $self->_get_output_version();
565 my $output_method = $self->_get_output_method($version);
567 return $self->_output_env(
568 sub { return $self->$output_method(); }
572 # Checks if inside a possibly namespaced element
573 # TODO : After increasing test coverage convert all such conditionals to this
576 my ($self, $elem) = @_;
578 my $parser = $self->_parser;
580 return $parser->within_element($elem)
581 || $parser->within_element(
582 $parser->generate_ns_name($elem, $self->{rss_namespace})
586 sub _get_elem_namespace_helper {
587 my ($self, $el) = @_;
589 my $ns = $self->_parser->namespace($el);
591 return (defined($ns) ? $ns : "");
594 sub _get_elem_namespace {
599 my $ns = $self->_get_elem_namespace_helper(@_);
601 my $verdict = (!$ns && !$self->{rss_namespace})
602 || ($ns eq $self->{rss_namespace});
604 return ($ns, $verdict);
607 sub _current_element {
610 return $self->_parser->current_element;
613 sub _get_current_namespace {
616 return $self->_get_elem_namespace($self->_current_element);
619 sub _is_rdf_resource {
626 $ns = $self->_parser->namespace($el);
630 exists($self->_rdf_resource_fields->{ $ns })
631 && exists($self->_rdf_resource_fields->{ $ns }{ $el })
635 sub _get_ns_arrayity {
636 my ($self, $ns) = @_;
639 $self->_parse_options()->{'modules_as_arrays'}
640 && (!exists($self->_get_default_modules()->{$ns}))
642 && ($ns ne "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
645 my $default_ref = sub { $is_array ? [] : {} };
647 return ($is_array, $default_ref);
650 sub _append_text_to_elem_struct {
651 my ($self, $struct, $cdata, $mapping_sub, $is_array_sub) = @_;
653 my $elem = $self->_current_element;
655 my ($ns, $verdict) = $self->_get_current_namespace;
657 # If it's in the default namespace
659 $self->_append_struct(
661 scalar($mapping_sub->($struct, $elem)),
662 scalar($is_array_sub->($struct, $elem)),
667 my $prefix = $self->{modules}->{$ns};
669 my ($is_array, $default_ref) = $self->_get_ns_arrayity($ns);
671 $self->_append_struct(
672 ($struct->{$ns} ||= $default_ref->()),
674 (defined($prefix) && $prefix eq "dc"),
678 # If it's in a module namespace, provide a friendlier prefix duplicate
680 $self->_append_struct(
681 ($struct->{$prefix} ||= $default_ref->()),
693 my ($self, $struct, $key, $can_be_array, $cdata) = @_;
695 if (ref($struct) eq 'ARRAY') {
696 $struct->[-1]->{'val'} .= $cdata;
699 elsif (defined $struct->{$key}) {
700 if (ref($struct->{$key}) eq 'HASH') {
701 $struct->{$key}->{content} .= $cdata;
704 elsif ($can_be_array && ref($struct->{$key}) eq 'ARRAY') {
705 $struct->{$key}->[-1] .= $cdata;
710 $struct->{$key} .= $cdata;
715 my ($struct, $elem) = @_;
719 sub _return_elem_is_array {
720 my ($struct, $elem) = @_;
722 # Always return false because no element should be an array.
726 sub _append_text_to_elem {
727 my ($self, $ext_tag, $cdata) = @_;
729 return $self->_append_text_to_elem_struct(
733 \&_return_elem_is_array,
740 my $parser = $self->_parser;
742 return $parser->within_element(
743 $parser->generate_ns_name(
744 "topics", 'http://purl.org/rss/1.0/modules/taxonomy/'
749 sub _return_item_elem {
750 my ($item, $elem) = @_;
751 if ($elem eq "guid") {
752 return $item->{isPermaLink} ? "permaLink" : "guid";
759 sub _return_item_elem_is_array {
760 my ($item, $elem) = @_;
762 return ($elem eq "category");
765 sub _append_text_to_item {
766 my ($self, $cdata) = @_;
768 if (@{$self->{'items'}} < $self->{num_items}) {
769 push @{$self->{items}}, {};
772 $self->_append_text_to_elem_struct(
776 \&_return_item_elem_is_array
780 sub _append_to_array_elem {
781 my ($self, $category, $cdata) = @_;
783 if (! $self->_my_in_element($category))
788 my $el = $self->_current_element;
790 if (ref($self->{$category}->{$el}) eq "ARRAY") {
791 $self->{$category}->{$el}->[-1] .= $cdata;
794 $self->{$category}->{$el} .= $cdata;
801 my ($self, $cdata) = (@_);
804 if ($self->_my_in_element("image")) {
805 $self->_append_text_to_elem("image", $cdata);
808 elsif (defined($self->{_inside_item_elem})) {
809 return if $self->_within_topics;
811 $self->_append_text_to_item($cdata);
815 $self->_my_in_element("textinput") || $self->_my_in_element("textInput")
818 $self->_append_text_to_elem("textinput", $cdata);
821 elsif ($self->_append_to_array_elem("skipHours", $cdata)) {
822 # Do nothing - already done in the predicate.
824 elsif ($self->_append_to_array_elem("skipDays", $cdata)) {
825 # Do nothing - already done in the predicate.
828 elsif ($self->_my_in_element("channel")) {
829 if ($self->_within_topics() || $self->_my_in_element("items")) {
833 if ($self->_current_element eq "category") {
834 $self->_append_to_array_elem("channel", $cdata);
837 $self->_append_text_to_elem("channel", $cdata);
843 my ($self, $version, $encoding, $standalone) = (@_);
844 $self->{encoding} = $encoding;
846 #print "ENCODING: $encoding\n";
849 sub _should_be_hashref {
850 my ($self, $el) = @_;
854 $empty_ok_elements{$el}
855 || ($self->_parse_options()->{'hashrefs_instead_of_strings'}
856 && $hashref_ok_elements{$el}
861 sub _start_array_element_in_struct {
862 my ($self, $input_struct, $el, $prefix) = @_;
864 my ($el_ns, $el_verdict) = $self->_get_elem_namespace($el);
866 my ($is_array, $default_ref) = $self->_get_ns_arrayity($el_ns);
868 my @structs = (!$el_verdict)
870 (exists($self->{modules}->{$el_ns})
871 ? ($input_struct->{$self->{modules}->{$el_ns}} ||= $default_ref->())
874 ($input_struct->{$el_ns} ||= $default_ref->()),
879 foreach my $struct (@structs)
881 if (ref($struct) eq 'ARRAY') {
882 push @$struct, { el => $el, val => "", };
884 # If it's an array - append a new empty element because a new one
886 elsif (ref($struct->{$el}) eq "ARRAY") {
887 push @{$struct->{$el}}, "";
889 # If it's not an array but still full (i.e: it's only the second
890 # element), then turn it into an array
891 elsif (defined($struct->{$el}) && length($struct->{$el})) {
892 $struct->{$el} = [$struct->{$el}, ""];
894 # Else - do nothing and let the function append to the new value
900 sub _start_array_element {
901 my ($self, $cat, $el) = @_;
903 if (!$self->_my_in_element($cat)) {
907 $self->_start_array_element_in_struct($self->{$cat}, $el);
914 return ($self->{'items'}->[$self->{num_items} - 1] ||= {});
922 my $parser = $self->_parser;
924 my ($el_ns, $el_verdict) = $self->_get_elem_namespace($el);
928 if (exists($attribs{'resource'}))
930 $self->image("rdf:resource", $attribs{'resource'});
934 # beginning of RSS 0.91
936 if (exists($attribs{version})) {
937 $self->{_internal}->{version} = $attribs{version};
940 croak "Malformed RSS: invalid version\n";
944 $self->{'xml:base'} = $attribs{'base'} if exists $attribs{'base'};
946 # beginning of RSS 1.0 or RSS 0.9
948 elsif ($el eq 'RDF') {
949 my @prefixes = $parser->new_ns_prefixes;
950 foreach my $prefix (@prefixes) {
951 my $uri = $parser->expand_ns_prefix($prefix);
952 $self->{namespaces}->{$prefix} = $uri;
954 #print "$prefix = $uri\n";
957 # removed assumption that RSS is the default namespace - kellan, 11/5/02
959 foreach my $uri (values %{$self->{namespaces}}) {
960 if ($namespace_map->{'rss10'} eq $uri) {
961 $self->{_internal}->{version} = '1.0';
962 $self->{rss_namespace} = $uri;
965 elsif ($namespace_map->{'rss09'} eq $uri) {
966 $self->{_internal}->{version} = '0.9';
967 $self->{rss_namespace} = $uri;
972 # failed to match a namespace
973 if (!defined($self->{_internal}->{version})) {
974 croak "Malformed RSS: invalid version\n";
977 #if ($self->expand_ns_prefix('#default') =~ /\/1.0\//) {
978 # $self->{_internal}->{version} = '1.0';
979 #} elsif ($self->expand_ns_prefix('#default') =~ /\/0.9\//) {
980 # $self->{_internal}->{version} = '0.9';
982 # croak "Malformed RSS: invalid version\n";
986 $self->{'xml:base'} = $attribs{'base'} if exists $attribs{'base'};
988 # beginning of item element
990 elsif ($self->_start_array_element("skipHours", $el)) {
991 # Do nothing - already done in the predicate.
993 elsif ($self->_start_array_element("skipDays", $el)) {
994 # Do nothing - already done in the predicate.
996 elsif ($el eq 'item') {
998 # deal with trouble makers who use mod_content :)
1000 my ($ns, $verdict) = $self->_get_elem_namespace($el);
1004 # Sanity check to make sure we don't have nested elements that
1005 # can confuse the parser.
1006 if (!defined($self->{_inside_item_elem})) {
1008 # increment item count
1009 $self->{num_items}++;
1010 $self->{_inside_item_elem} = $parser->depth();
1014 $self->_last_item->{'xml:base'} = $attribs{'base'} if exists $attribs{'base'};
1017 # guid element is a permanent link unless isPermaLink attribute is set to false
1019 elsif ($el eq 'guid') {
1020 $self->_last_item->{'isPermaLink'} =
1021 (exists($attribs{'isPermaLink'}) &&
1022 (lc($attribs{'isPermaLink'}) eq 'true')
1025 # beginning of taxo li element in item element
1026 #'http://purl.org/rss/1.0/modules/taxonomy/' => 'taxo'
1029 $self->_current_element eq "item"
1030 && (($el eq "category") ||
1032 exists($self->{modules}->{$el_ns})
1033 && ($self->{modules}->{$el_ns} eq "dc")
1037 $self->_start_array_element_in_struct($self->_last_item, $el);
1040 $parser->within_element(
1041 $parser->generate_ns_name("topics", 'http://purl.org/rss/1.0/modules/taxonomy/')
1043 && $parser->within_element($parser->generate_ns_name("item", $namespace_map->{'rss10'}))
1044 && $self->_current_element eq 'Bag'
1049 #print "taxo: ", $attribs{'resource'},"\n";
1050 push(@{$self->_last_item->{'taxo'}}, $attribs{'resource'});
1051 $self->{'modules'}->{'http://purl.org/rss/1.0/modules/taxonomy/'} = 'taxo';
1053 # beginning of taxo li in channel element
1056 $parser->within_element(
1057 $parser->generate_ns_name("topics", 'http://purl.org/rss/1.0/modules/taxonomy/')
1059 && $parser->within_element($parser->generate_ns_name("channel", $namespace_map->{'rss10'}))
1060 && $self->_current_element eq 'Bag'
1064 push(@{$self->{'channel'}->{'taxo'}}, $attribs{'resource'});
1065 $self->{'modules'}->{'http://purl.org/rss/1.0/modules/taxonomy/'} = 'taxo';
1068 # beginning of a channel element that stores its info in rdf:resource
1069 elsif ( $parser->namespace($el)
1070 && $self->_is_rdf_resource($el)
1071 && $self->_current_element eq 'channel')
1073 my $ns = $parser->namespace($el);
1075 # Commented out by shlomif - the RSS namespaces are not present
1076 # in the 'rdf_resource_fields' so this condition always evaluates
1078 # if ( $ns eq $self->{rss_namespace} ) {
1079 # $self->{channel}->{$el} = $attribs{resource};
1084 $self->{channel}->{$ns}->{$el} = $attribs{resource};
1088 if (exists($self->{modules}->{$ns})) {
1089 $ns = $self->{modules}->{$ns};
1090 $self->{channel}->{$ns}->{$el} = $attribs{resource};
1094 # beginning of an item element that stores its info in rdf:resource
1095 elsif ( $parser->namespace($el)
1096 && $self->_is_rdf_resource($el)
1097 && $self->_current_element eq 'item')
1099 my $ns = $parser->namespace($el);
1101 # Commented out by shlomif - the RSS namespaces are not present
1102 # in the 'rdf_resource_fields' so this condition always evaluates
1104 # if ( $ns eq $self->{rss_namespace} ) {
1105 # $self->_last_item->{ $el } = $attribs{resource};
1109 $self->_last_item->{$ns}->{$el} = $attribs{resource};
1113 if (exists($self->{modules}->{$ns})) {
1114 $ns = $self->{modules}->{$ns};
1115 $self->_last_item->{$ns}->{$el} = $attribs{resource};
1119 elsif ($self->_should_be_hashref($el) and $self->_current_element eq 'item') {
1120 if (defined $attribs{base}) {
1121 $attribs{'xml:base'} = delete $attribs{base};
1123 if (keys(%attribs)) {
1125 $self->_last_item->{$el} =
1126 $self->_make_array($el, $self->_last_item->{$el}, \%attribs);
1129 $self->_last_item->{$el_ns}->{$el} =
1130 $self->_make_array($el, $self->_last_item->{$el_ns}->{$el}, \%attribs);
1132 my $prefix = $self->{modules}->{$el_ns};
1135 $self->_last_item->{$prefix}->{$el} =
1136 $self->_make_array($el, $self->_last_item->{$prefix}->{$el}, \%attribs);
1141 elsif ($self->_start_array_element("image", $el)) {
1142 # Do nothing - already done in the predicate.
1144 elsif (($el eq "category") &&
1145 (!$parser->within_element("item")) &&
1146 $self->_start_array_element("channel", $el)) {
1147 # Do nothing - already done in the predicate.
1149 elsif (($self->_current_element eq 'channel') &&
1152 # Make sure an opening tag signifies that the element has been
1154 if ( exists($self->{'channel'}->{$el})
1155 && (!defined($self->{'channel'}->{$el})))
1157 $self->{'channel'}->{$el} = "";
1168 if (!$self->_allow_multiple($el)) {
1172 if (!defined $old) {
1174 } elsif (ref($old) ne 'ARRAY') {
1181 sub _allow_multiple {
1185 $self->{_allow_multiple} ||=
1188 @{$self->_parse_options->{allow_multiple} || []}
1191 return $self->{_allow_multiple}->{$el};
1195 my ($self, $el) = @_;
1197 if (defined($self->{_inside_item_elem})
1198 && $self->{_inside_item_elem} == $self->_parser->depth())
1200 delete($self->{_inside_item_elem});
1204 sub _auto_add_modules {
1207 for my $ns (keys %{$self->{namespaces}}) {
1209 # skip default namespaces
1212 || $ns eq "#default"
1213 || exists $self->{modules}{$self->{namespaces}{$ns}};
1214 $self->add_module(prefix => $ns, uri => $self->{namespaces}{$ns});
1224 $self->{_parser} = shift;
1226 return $self->{_parser};
1232 return XML::Parser->new(
1238 my ($parser, $cdata) = @_;
1239 $self->_parser($parser);
1240 $self->_handle_char($cdata);
1241 # Detach the parser to avoid reference loops.
1242 $self->_parser(undef);
1246 $self->_parser($parser);
1247 $self->_handle_dec(@_);
1248 # Detach the parser to avoid reference loops.
1249 $self->_parser(undef);
1253 $self->_parser($parser);
1254 $self->_handle_start(@_);
1255 # Detach the parser to avoid reference loops.
1256 $self->_parser(undef);
1260 $self->_parser($parser);
1261 $self->_handle_end(@_);
1262 # Detach the parser to avoid reference loops.
1263 $self->_parser(undef);
1269 sub _parse_options {
1273 $self->{_parse_options} = shift;
1276 return $self->{_parse_options};
1281 sub _generic_parse {
1285 my $options = shift;
1289 $self->_parse_options($options || {});
1291 # Workaround to make sure that if we were defined with version => "2.0"
1292 # then we can still parse 1.0 and 0.9.x feeds correctly.
1293 if ($self->{version} eq "2.0") {
1294 $self->{modules} = +{%{$self->_get_default_modules()}, %{$self->{modules}}};
1298 my $parser = $self->_get_parser();
1301 $parser->$method($arg);
1308 # Cleanup so perl-5.6.2 will be happy.
1309 $parser->setHandlers(
1310 map { ($_ => \&_empty) } (qw(Char XMLDecl Start End))
1320 $self->_auto_add_modules if $AUTO_ADD;
1321 $self->{version} = $self->{_internal}->{version};
1328 my $text_to_parse = shift;
1329 my $options = shift;
1331 return $self->_generic_parse("parse", $text_to_parse, $options);
1336 my $file_to_parse = shift;
1337 my $options = shift;
1339 return $self->_generic_parse("parsefile", $file_to_parse, $options);
1342 sub _get_save_output_mode {
1345 return (">:encoding(" . $self->_encoding() . ")");
1349 my ($self, $file) = @_;
1353 open(OUT, $self->_get_save_output_mode(), "$file")
1354 or croak "Cannot open file $file for write: $!";
1355 print OUT $self->as_string;
1360 my ($self, $value) = @_;
1361 $self->{'strict'} = $value;
1364 sub _handle_accessor {
1368 my $type = ref($self);
1370 croak "Unregistered entity: Can't access $name field in object of class $type"
1371 unless (exists $self->{$name});
1373 # return reference to RSS structure
1375 return $self->{$name}->{$_[0]};
1377 # we're going to set values here
1383 # make sure we have required elements and correct lengths
1384 if ($self->{'strict'}) {
1385 ($self->{version} eq '0.9')
1386 ? ($_REQ = $_REQ_v0_9)
1387 : ($_REQ = $_REQ_v0_9_1);
1390 # store data in object
1391 foreach my $key (keys(%hash)) {
1392 if ($self->{'strict'}) {
1393 my $req_element = $_REQ->{$name}->{$key};
1394 confess "$key cannot exceed " . $req_element->[1] . " characters in length"
1395 if defined $req_element->[1] && length($hash{$key}) > $req_element->[1];
1397 $self->{$name}->{$key} = $hash{$key};
1401 return $self->{$name};
1403 # otherwise, just return a reference to the whole thing
1406 return $self->{$name};
1409 # make sure we have all required elements
1410 #foreach my $key (keys(%{$_REQ->{$name}})) {
1411 #my $element = $_REQ->{$name}->{$key};
1412 #croak "$key is required in $name"
1413 #if ($element->[0] == 1) && (!defined($hash{$key}));
1414 #croak "$key cannot exceed ".$element->[1]." characters in length"
1415 #unless length($hash{$key}) <= $element->[1];
1421 return $self->_handle_accessor("modules", @_);;
1427 return $self->_handle_accessor("channel", @_);
1433 return $self->_handle_accessor("image", @_);
1439 return $self->_handle_accessor("textinput", @_);
1445 return $self->_handle_accessor("skipDays", @_);
1451 return $self->_handle_accessor("skipHours", @_);
1454 ### Read only, scalar accessors
1456 sub _encode_output {
1459 return $self->{'encode_output'};
1465 return $self->{'encoding'};
1471 return $self->{'stylesheet'};
1477 return $self->{items};
1485 XML::RSS - creates and updates RSS files
1489 # create an RSS 1.0 file (http://purl.org/rss/1.0/)
1491 my $rss = XML::RSS->new(version => '1.0');
1493 title => "freshmeat.net",
1494 link => "http://freshmeat.net",
1495 description => "the one-stop-shop for all your Linux software needs",
1497 date => '2000-08-23T07:00+00:00',
1498 subject => "Linux Software",
1499 creator => 'scoop@freshmeat.net',
1500 publisher => 'scoop@freshmeat.net',
1501 rights => 'Copyright 1999, Freshmeat.net',
1502 language => 'en-us',
1505 updatePeriod => "hourly",
1506 updateFrequency => "1",
1507 updateBase => "1901-01-01T00:00+00:00",
1510 'http://dmoz.org/Computers/Internet',
1511 'http://dmoz.org/Computers/PC'
1516 title => "freshmeat.net",
1517 url => "http://freshmeat.net/images/fm.mini.jpg",
1518 link => "http://freshmeat.net",
1520 creator => "G. Raphics (graphics at freshmeat.net)",
1525 title => "GTKeyboard 0.85",
1526 link => "http://freshmeat.net/news/1999/06/21/930003829.html",
1527 description => "GTKeyboard is a graphical keyboard that ...",
1529 subject => "X11/Utilities",
1530 creator => "David Allen (s2mdalle at titan.vcu.edu)",
1533 'http://dmoz.org/Computers/Internet',
1534 'http://dmoz.org/Computers/PC'
1539 title => "quick finder",
1540 description => "Use the text input below to search freshmeat",
1542 link => "http://core.freshmeat.net/search.php3",
1545 # Optionally mixing in elements of a non-standard module/namespace
1547 $rss->add_module(prefix=>'my', uri=>'http://purl.org/my/rss/module/');
1550 title => "xIrc 2.4pre2",
1551 link => "http://freshmeat.net/projects/xirc/",
1552 description => "xIrc is an X11-based IRC client which ...",
1555 category => "X11/IRC",
1559 $rss->add_item (title=>$title, link=>$link, slash=>{ topic=>$topic });
1561 # create an RSS 2.0 file
1563 my $rss = XML::RSS->new (version => '2.0');
1564 $rss->channel(title => 'freshmeat.net',
1565 link => 'http://freshmeat.net',
1567 description => 'the one-stop-shop for all your Linux software needs',
1568 rating => '(PICS-1.1 "http://www.classify.org/safesurf/" 1 r (SS~~000 1))',
1569 copyright => 'Copyright 1999, Freshmeat.net',
1570 pubDate => 'Thu, 23 Aug 1999 07:00:00 GMT',
1571 lastBuildDate => 'Thu, 23 Aug 1999 16:20:26 GMT',
1572 docs => 'http://www.blahblah.org/fm.cdf',
1573 managingEditor => 'scoop@freshmeat.net',
1574 webMaster => 'scoop@freshmeat.net'
1577 $rss->image(title => 'freshmeat.net',
1578 url => 'http://freshmeat.net/images/fm.mini.jpg',
1579 link => 'http://freshmeat.net',
1582 description => 'This is the Freshmeat image stupid'
1585 $rss->add_item(title => "GTKeyboard 0.85",
1586 # creates a guid field with permaLink=true
1587 permaLink => "http://freshmeat.net/news/1999/06/21/930003829.html",
1588 # alternately creates a guid field with permaLink=false
1589 # guid => "gtkeyboard-0.85"
1590 enclosure => { url=>$url, type=>"application/x-bittorrent" },
1591 description => 'blah blah'
1594 $rss->textinput(title => "quick finder",
1595 description => "Use the text input below to search freshmeat",
1597 link => "http://core.freshmeat.net/search.php3"
1600 # create an RSS 0.9 file
1602 my $rss = XML::RSS->new( version => '0.9' );
1603 $rss->channel(title => "freshmeat.net",
1604 link => "http://freshmeat.net",
1605 description => "the one-stop-shop for all your Linux software needs",
1608 $rss->image(title => "freshmeat.net",
1609 url => "http://freshmeat.net/images/fm.mini.jpg",
1610 link => "http://freshmeat.net"
1613 $rss->add_item(title => "GTKeyboard 0.85",
1614 link => "http://freshmeat.net/news/1999/06/21/930003829.html"
1617 $rss->textinput(title => "quick finder",
1618 description => "Use the text input below to search freshmeat",
1620 link => "http://core.freshmeat.net/search.php3"
1623 # print the RSS as a string
1624 print $rss->as_string;
1626 # or save it to a file
1627 $rss->save("fm.rdf");
1629 # insert an item into an RSS file and removes the oldest ones if
1630 # there are already 15 items or more
1631 my $rss = XML::RSS->new;
1632 $rss->parsefile("fm.rdf");
1634 while (@{$rss->{'items'}} >= 15)
1636 pop(@{$rss->{'items'});
1639 $rss->add_item(title => "MpegTV Player (mtv) 1.0.9.7",
1640 link => "http://freshmeat.net/news/1999/06/21/930003958.html",
1644 # parse a string instead of a file
1645 $rss->parse($string);
1647 # print the title and link of each RSS item
1648 foreach my $item (@{$rss->{'items'}}) {
1649 print "title: $item->{'title'}\n";
1650 print "link: $item->{'link'}\n\n";
1653 # output the RSS 0.9 or 0.91 file as RSS 1.0
1654 $rss->{output} = '1.0';
1655 print $rss->as_string;
1659 This module provides a basic framework for creating and maintaining
1660 RDF Site Summary (RSS) files. This distribution also contains many
1661 examples that allow you to generate HTML from an RSS, convert between
1662 0.9, 0.91, and 1.0 version, and other nifty things.
1663 This might be helpful if you want to include news feeds on your Web
1664 site from sources like Slashdot and Freshmeat or if you want to syndicate
1667 XML::RSS currently supports 0.9, 0.91, and 1.0 versions of RSS.
1668 See http://backend.userland.com/rss091 for information on RSS 0.91.
1669 See http://www.purplepages.ie/RSS/netscape/rss0.90.html for RSS 0.9.
1670 See http://web.resource.org/rss/1.0/ for RSS 1.0.
1672 RSS was originally developed by Netscape as the format for
1673 Netscape Netcenter channels, however, many Web sites have since
1674 adopted it as a simple syndication format. With the advent of RSS 1.0,
1675 users are now able to syndication many different kinds of content
1676 including news headlines, threaded measages, products catalogs, etc.
1678 B<Note:> In order to parse and generate dates (such as C<pubDate>
1679 and C<dc:date>) it is recommended to use L<DateTime::Format::Mail> and
1680 L<DateTime::Format::W3CDTF> , which is what L<XML::RSS> uses internally
1687 =item XML::RSS->new(version=>$version, encoding=>$encoding, output=>$output, stylesheet=>$stylesheet_url, 'xml:base'=>$base)
1689 Constructor for XML::RSS. It returns a reference to an XML::RSS object.
1690 You may also pass the RSS version and the XML encoding to use. The default
1691 B<version> is 1.0. The default B<encoding> is UTF-8. You may also specify
1692 the B<output> format regardless of the input version. This comes in handy
1693 when you want to convert RSS between versions. The XML::RSS modules
1694 will convert between any of the formats. If you set <encode_output> XML::RSS
1695 will make sure to encode any entities in generated RSS. This is now on by
1698 You can also pass an optional URL to an XSL stylesheet that can be used to
1699 output an C<<< <?xsl-stylesheet ... ?> >>> meta-tag in the header that will
1700 allow some browsers to render the RSS file as HTML.
1702 You can also set C<encode_cb> to a reference to a subroutine that will
1703 encode the output in a custom way. This subroutine accepts two parameters:
1704 a reference to the C<XML::RSS::Private::Output::Base>-derived object (which
1705 should normally not concern you) and the text to encode. It should return
1706 the text to encode. If not set, then the module will encode using its
1707 custom encoding routine.
1709 xml:base will set an C<xml:base> property as per
1711 http://www.w3.org/TR/xmlbase/
1713 Note that in order to encode properly, you need to handle "CDATA" sections
1714 properly. Look at L<XML::RSS::Private::Output::Base>'s C<_default_encode()>
1715 method for how to do it properly.
1717 =item add_item (title=>$title, link=>$link, description=>$desc, mode=>$mode)
1719 Adds an item to the XML::RSS object. B<mode> and B<description> are optional.
1721 is append, which adds the item to the end of the list. To insert an item, set the mode
1724 The items are stored in the array @{$obj->{'items'}} where
1725 B<$obj> is a reference to an XML::RSS object.
1729 Returns a string containing the RSS for the XML::RSS object. This
1730 method will also encode special characters along the way.
1732 =item channel (title=>$title, link=>$link, description=>$desc, language=>$language, rating=>$rating, copyright=>$copyright, pubDate=>$pubDate, lastBuildDate=>$lastBuild, docs=>$docs, managingEditor=>$editor, webMaster=>$webMaster)
1734 Channel information is required in RSS. The B<title> cannot
1735 be more the 40 characters, the B<link> 500, and the B<description>
1736 500 when outputting RSS 0.9. B<title>, B<link>, and B<description>,
1737 are required for RSS 1.0. B<language> is required for RSS 0.91.
1738 The other parameters are optional for RSS 0.91 and 1.0.
1740 To retreive the values of the channel, pass the name of the value
1741 (title, link, or description) as the first and only argument
1744 $title = channel('title');
1746 =item image (title=>$title, url=>$url, link=>$link, width=>$width, height=>$height, description=>$desc)
1748 Adding an image is not required. B<url> is the URL of the
1749 image, B<link> is the URL the image is linked to. B<title>, B<url>,
1750 and B<link> parameters are required if you are going to
1751 use an image in your RSS file. The remaining image elements are used
1752 in RSS 0.91 or optionally imported into RSS 1.0 via the rss091 namespace.
1754 The method for retrieving the values for the image is the same as it
1755 is for B<channel()>.
1757 =item parse ($string, \%options)
1759 Parses an RDF Site Summary which is passed into B<parse()> as the first
1760 parameter. Returns the instance of the object so one can say
1761 C<<$rss->parse($string)->other_method()>>.
1763 See the add_module() method for instructions on automatically adding
1764 modules as a string is parsed.
1766 %options is a list of options that specify how parsing is to be done. The
1767 available options are:
1771 =item * allow_multiple
1773 Takes an array ref of names which indicates which elements should
1774 be allowed to have multiple occurrences. So, for example, to parse
1775 feeds with multiple enclosures
1777 $rss->parse($xml, { allow_multiple => ['enclosure'] });
1779 =item * hashrefs_instead_of_strings
1781 If true, then some items (so far "C<description>") will become hash-references
1782 instead of strings (with a B<content> key containing their content , B<if>
1783 they have XML attributes. Without this key, the attributes will be ignored
1784 and there will only be a string. Thus, specifying this option may break
1787 =item * modules_as_arrays
1789 This option when true, will parse the modules key-value-pairs as an arrayref of
1790 C<<< { el => $key_name, value => $value, } >>> hash-refs to gracefully
1791 handle duplicate items (see below). It will not affect the known modules such
1792 as dc ("Dublin Core").
1796 =item parsefile ($file, \%options)
1798 Same as B<parse()> except it parses a file rather than a string.
1800 See the add_module() method for instructions on automatically adding
1801 modules as a string is parsed.
1805 Saves the RSS to a specified file.
1807 =item skipDays (day => $day)
1809 Populates the skipDays element with the day $day.
1811 =item skipHours (hour => $hour)
1813 Populates the skipHours element, with the hour $hour.
1815 =item strict ($boolean)
1817 If it's set to 1, it will adhere to the lengths as specified
1818 by Netscape Netcenter requirements. It's set to 0 by default.
1819 Use it if the RSS file you're generating is for Netcenter.
1820 strict will only work for RSS 0.9 and 0.91. Do not use it for
1823 =item textinput (title=>$title, description=>$desc, name=>$name, link=>$link);
1825 This RSS element is also optional. Using it allows users to submit a Query
1826 to a program on a Web server via an HTML form. B<name> is the HTML form name
1827 and B<link> is the URL to the program. Content is submitted using the GET
1830 Access to the B<textinput> values is the the same as B<channel()> and
1833 =item add_module(prefix=>$prefix, uri=>$uri)
1835 Adds a module namespace declaration to the XML::RSS object, allowing you
1836 to add modularity outside of the the standard RSS 1.0 modules. At present,
1837 the standard modules Dublin Core (dc) and Syndication (syn) are predefined
1838 for your convenience. The Taxonomy (taxo) module is also internally supported.
1840 The modules are stored in the hash %{$obj->{'modules'}} where
1841 B<$obj> is a reference to an XML::RSS object.
1843 If you want to automatically add modules that the parser finds in
1844 namespaces, set the $XML::RSS::AUTO_ADD variable to a true value. By
1845 default the value is false. (N.B. AUTO_ADD only updates the
1846 %{$obj->{'modules'}} hash. It does not provide the other benefits
1847 of using add_module.)
1851 =head2 RSS 1.0 MODULES
1853 XML-Namespace-based modularization affords RSS 1.0 compartmentalized
1854 extensibility. The only modules that ship "in the box" with RSS 1.0
1855 are Dublin Core (http://purl.org/rss/1.0/modules/dc/), Syndication
1856 (http://purl.org/rss/1.0/modules/syndication/), and Taxonomy
1857 (http://purl.org/rss/1.0/modules/taxonomy/). Consult the appropriate
1858 module's documentation for further information.
1860 Adding items from these modules in XML::RSS is as simple as adding other
1861 attributes such as title, link, and description. The only difference
1862 is the compartmentalization of their key/value paris in a second-level
1865 $rss->add_item (title=>$title, link=>$link, dc=>{ subject=>$subject, creator=>$creator, date=>$date });
1867 For elements of the Dublin Core module, use the key 'dc'. For elements
1868 of the Syndication module, 'syn'. For elements of the Taxonomy module,
1869 'taxo'. These are the prefixes used in the RSS XML document itself.
1870 They are associated with appropriate URI-based namespaces:
1872 syn: http://purl.org/rss/1.0/modules/syndication/
1873 dc: http://purl.org/dc/elements/1.1/
1874 taxo: http://purl.org/rss/1.0/modules/taxonomy/
1876 The Dublin Core ('dc') hash keys may be point to an array
1877 reference, which in turn will specify multiple such keys, and render them
1878 one after the other. For example:
1884 subject=> ["Jungle", "Desert", "Swamp"],
1890 Dublin Core elements may occur in channel, image, item(s), and textinput
1891 -- albeit uncomming to find them under image and textinput. Syndication
1892 elements are limited to the channel element. Taxonomy elements can occur
1893 in the channel or item elements.
1895 Access to module elements after parsing an RSS 1.0 document using
1896 XML::RSS is via either the prefix or namespace URI for your convenience.
1898 print $rss->{items}->[0]->{dc}->{subject};
1902 print $rss->{items}->[0]->{'http://purl.org/dc/elements/1.1/'}->{subject};
1904 XML::RSS also has support for "non-standard" RSS 1.0 modularization at
1905 the channel, image, item, and textinput levels. Parsing an RSS document
1906 grabs any elements of other namespaces which might appear. XML::RSS
1907 also allows the inclusion of arbitrary namespaces and associated elements
1908 when building RSS documents.
1910 For example, to add elements of a made-up "My" module, first declare the
1911 namespace by associating a prefix with a URI:
1913 $rss->add_module(prefix=>'my', uri=>'http://purl.org/my/rss/module/');
1915 Then proceed as usual:
1917 $rss->add_item (title=>$title, link=>$link, my=>{ rating=>$rating });
1919 You can also set the value of the module's prefix to an array reference
1920 of C<<< { el => , val => } >>> hash-references, in which case duplicate
1921 elements are possible:
1923 $rss->add_item(title=>$title, link=>$link, my=> [
1924 {el => "rating", value => $rating1, }
1925 {el => "rating", value => $rating2, },
1928 Non-standard namespaces are not, however, currently accessible via a simple
1929 prefix; access them via their namespace URL like so:
1931 print $rss->{items}->[0]->{'http://purl.org/my/rss/module/'}->{rating};
1933 XML::RSS will continue to provide built-in support for standard RSS 1.0
1934 modules as they appear.
1936 =head1 Non-API Methods
1938 =head2 $rss->as_rss_0_9()
1940 B<WARNING>: this function is not an API function and should not be called
1941 directly. It is kept as is for backwards compatibility with legacy code. Use
1942 the following code instead:
1944 $rss->{output} = "0.9";
1945 my $text = $rss->as_string();
1947 This function renders the data in the object as an RSS version 0.9 feed,
1948 and returns the resultant XML as text.
1950 =head2 $rss->as_rss_0_9_1()
1952 B<WARNING>: this function is not an API function and should not be called
1953 directly. It is kept as is for backwards compatibility with legacy code. Use
1954 the following code instead:
1956 $rss->{output} = "0.91";
1957 my $text = $rss->as_string();
1959 This function renders the data in the object as an RSS version 0.91 feed,
1960 and returns the resultant XML as text.
1962 =head2 $rss->as_rss_1_0()
1964 B<WARNING>: this function is not an API function and should not be called
1965 directly. It is kept as is for backwards compatibility with legacy code. Use
1966 the following code instead:
1968 $rss->{output} = "1.0";
1969 my $text = $rss->as_string();
1971 This function renders the data in the object as an RSS version 1.0 feed,
1972 and returns the resultant XML as text.
1974 =head2 $rss->as_rss_2_0()
1976 B<WARNING>: this function is not an API function and should not be called
1977 directly. It is kept as is for backwards compatibility with legacy code. Use
1978 the following code instead:
1980 $rss->{output} = "2.0";
1981 my $text = $rss->as_string();
1983 This function renders the data in the object as an RSS version 2.0 feed,
1984 and returns the resultant XML as text.
1986 =head2 $rss->handle_char()
1988 Needed for XML::Parser. Don't use this directly.
1990 =head2 $rss->handle_dec()
1992 Needed for XML::Parser. Don't use this directly.
1994 =head2 $rss->handle_start()
1996 Needed for XML::Parser. Don't use this directly.
2000 Please use rt.cpan.org for tracking bugs. The list of current open
2002 L<http://rt.cpan.org/Dist/Display.html?Queue=XML-RSS>.
2004 To report a new bug, go to
2005 L<http://rt.cpan.org/Ticket/Create.html?Queue=XML-RSS>
2007 Please include a failing test in your bug report. I'd much rather
2008 have a well written test with the bug report than a patch.
2010 When you create diffs (for tests or patches), please use the C<-u>
2013 =head1 SOURCE AVAILABILITY
2015 The source is available from the perl.org Subversion server:
2017 L<http://svn.perl.org/modules/XML-RSS/>
2022 Original code: Jonathan Eisenzopf <eisen@pobox.com>
2024 Further changes: Rael Dornfest <rael@oreilly.com>, Ask Bjoern Hansen
2025 <ask@develooper.com>
2027 Currently: Shlomi Fish <shlomif@cpan.org>
2031 Copyright (c) 2001 Jonathan Eisenzopf <eisen@pobox.com> and Rael
2032 Dornfest <rael@oreilly.com>, Copyright (C) 2006-2007 Ask Bjoern Hansen
2033 <ask@develooper.com>.
2037 XML::RSS is free software. You can redistribute it and/or
2038 modify it under the same terms as Perl itself.
2042 Wojciech Zwiefka <wojtekz@cnt.pl>
2043 Chris Nandor <pudge@pobox.com>
2044 Jim Hebert <jim@cosource.com>
2045 Randal Schwartz <merlyn@stonehenge.com>
2047 Kellan Elliott-McCrea <kellan@protest.net>
2048 Rafe Colburn <rafe@rafe.us>
2049 Adam Trickett <atrickett@cpan.org>
2050 Aaron Straup Cope <asc@vineyard.net>
2051 Ian Davis <iand@internetalchemy.org>
2053 Shlomi Fish <shlomif@iglu.org.il>
2057 perl(1), XML::Parser(3).