error handling for page metadata caching
[scpubgit/SCS.git] / lib / SCSite / PageSet.pm
CommitLineData
95148a72 1package SCSite::PageSet;
2
95148a72 3use Text::MultiMarkdown 'markdown';
4use HTML::Zoom;
ebd4c292 5use Sub::Quote;
6use Syntax::Keyword::Gather;
7use SCSite::Page;
d01538bc 8use IO::All;
2a19b2a6 9use Try::Tiny;
d01538bc 10use JSON;
11use Moo;
12
13{
14 my $j = JSON->new;
15 sub _json { $j }
16}
95148a72 17
34597fb2 18has top_dir => (is => 'ro', lazy => 1, builder => 'base_dir');
95148a72 19has base_dir => (is => 'ro', required => 1);
ebd4c292 20has max_depth => (is => 'ro', default => quote_sub q{ 0 });
221c4151 21has min_depth => (is => 'ro', default => quote_sub q{ 1 });
95148a72 22
34597fb2 23has rel_path => (is => 'lazy');
24
25sub _build_rel_path {
26 my ($self) = @_;
27 io->dir('/')
28 ->catdir(File::Spec->abs2rel($self->base_dir->name, $self->top_dir->name))
29}
30
95148a72 31sub get {
32 my ($self, $spec) = @_;
33 $spec->{path} or die "path is required to get";
34 my ($dir, $file) = $spec->{path} =~ m{^(?:(.*)/)?([^/]+)$};
35 my $type;
5cc6d9e2 36 my @poss = io->dir($self->base_dir)->${\sub {
95148a72 37 my $io = shift;
38 defined($dir) ? $io->catdir($dir) : $io
ebd4c292 39 }}->filter(sub {
40 $_->filename =~ /^\Q${file}\E${\$self->_types_re}$/ and $type = $1
41 })
221c4151 42 ->${\sub { -e "$_[0]" ? $_[0]->all_files : () }};
95148a72 43 die "multiple files found for ${\$spec->{path}}:\n".join "\n", @poss
44 if @poss > 1;
ebd4c292 45 return undef unless @poss;
2a19b2a6 46 $self->_inflate(
d01538bc 47 $type, $self->rel_path->catdir($spec->{path}), $poss[0]
34597fb2 48 );
ebd4c292 49}
50
2a19b2a6 51sub _inflate {
d01538bc 52 my ($self, $type, $path, $io) = @_;
53 (my $cache_name = $io->name) =~ s/\/([^\/]+)$/\/.htcache.$1.json/;
54 my $cache = io($cache_name);
55 if (-f $cache_name) {
56 if ($cache->mtime >= $io->mtime) {
4e3f5bd2 57 return try {
58 $self->_new_page($path, $self->_json->decode($cache->all));
59 } catch {
60 die "Error inflating ${path} from cache: $_\n";
61 }
d01538bc 62 }
63 }
64 my $raw = $io->all;
65 try {
66 my $extracted = $self->${\"_extract_from_${type}"}($raw);
4e3f5bd2 67 try { $cache->print($self->_json->encode($extracted)); };
d01538bc 68 $self->_new_page($path, $extracted);
69 } catch {
4e3f5bd2 70 die "Error inflating ${path} as ${type}: $_\n";
2a19b2a6 71 }
72}
73
ebd4c292 74sub map {
75 my ($self, $mapper) = @_;
76 [ map $mapper->($_), $self->flatten ]
77}
78
fc436d2e 79sub _depth_under_base {
80 my ($self, $path) = @_;
81 File::Spec->splitdir(File::Spec->abs2rel($path, $self->base_dir->name))
82}
83
ebd4c292 84sub flatten {
85 my ($self) = @_;
fc436d2e 86 return unless (my $base = $self->base_dir)->exists;
ebd4c292 87 my %seen;
34597fb2 88 my $slash = io->dir('/');
fc436d2e 89 my $min = $self->min_depth;
ebd4c292 90 map {
91 my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
2a19b2a6 92 $self->_inflate(
93 $type,
fc436d2e 94 $slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name)),
d01538bc 95 $_
ebd4c292 96 );
fc436d2e 97 } map {
98 $_->filter(sub { $_->filename =~ /${\$self->_types_re}$/ })
221c4151 99 ->all_files($self->max_depth - ($min-1))
fc436d2e 100 } map
221c4151 101 $min > 1
fc436d2e 102 ? do {
221c4151 103 # can't use ->all_dirs($min-1) since we only want the final level
104 my @x = ($_); @x = map $_->all_dirs, @x for 1..$min-1; @x
fc436d2e 105 }
106 : $_,
221c4151 107 $base;
ebd4c292 108}
109
110sub latest {
111 my ($self, $max) = @_;
112 require SCSite::LatestPageSet;
113 SCSite::LatestPageSet->new(
114 parent => $self,
115 max_entries => $max,
116 );
95148a72 117}
118
119sub _new_page {
ebd4c292 120 SCSite::Page->new({ path => $_[1], page_set => $_[0], %{$_[2]} })
95148a72 121}
122
ebd4c292 123sub _types_re { qw/\.(html|md)/ }
124
95148a72 125sub _extract_from_html {
126 my ($self, $html) = @_;
127 HTML::Zoom->from_html($html)
128 ->select('title')->collect_content({ into => \my @title })
2a19b2a6 129 ->select('meta[name=subtitle]')->collect({ into => \my @subtitle })
95148a72 130 ->select('meta[name=description]')->collect({ into => \my @description })
131 ->select('meta[name=keywords]')->collect({ into => \my @keywords })
ebd4c292 132 ->select('meta[name=created]')->collect({ into => \my @created })
95148a72 133 ->select('body')->collect_content({ into => \my @body })
134 ->run;
135 +{
ebd4c292 136 title => $title[0]->{raw}||'',
2a19b2a6 137 subtitle => $subtitle[0]->{attrs}{content}||'',
ebd4c292 138 description => $description[0]->{attrs}{content}||'',
139 keywords => $keywords[0]->{attrs}{content}||'',
140 created => $created[0]->{attrs}{content}||'',
141 body => HTML::Zoom->from_events(\@body)->to_html||'',
95148a72 142 }
143}
144
95148a72 145sub _extract_from_md {
146 my ($self, $md) = @_;
147 $self->_extract_from_html(markdown($md, { document_format => 'complete' }));
148}
149
1501;