factor out child code into PageChildren and add an all_paths method to PageSet
[scpubgit/SCS.git] / lib / SCSite / PageSet.pm
CommitLineData
95148a72 1package SCSite::PageSet;
2
95148a72 3use Text::MultiMarkdown 'markdown';
4use HTML::Zoom;
ebd4c292 5use Sub::Quote;
6use Syntax::Keyword::Gather;
7use SCSite::Page;
d01538bc 8use IO::All;
2a19b2a6 9use Try::Tiny;
d01538bc 10use JSON;
11use Moo;
12
cbb06a72 13with 'SCSite::PageChildren';
14
d01538bc 15{
16 my $j = JSON->new;
17 sub _json { $j }
18}
95148a72 19
34597fb2 20has top_dir => (is => 'ro', lazy => 1, builder => 'base_dir');
95148a72 21has base_dir => (is => 'ro', required => 1);
ebd4c292 22has max_depth => (is => 'ro', default => quote_sub q{ 0 });
221c4151 23has min_depth => (is => 'ro', default => quote_sub q{ 1 });
95148a72 24
34597fb2 25has rel_path => (is => 'lazy');
26
27sub _build_rel_path {
28 my ($self) = @_;
29 io->dir('/')
30 ->catdir(File::Spec->abs2rel($self->base_dir->name, $self->top_dir->name))
31}
32
cbb06a72 33sub _page_set_class { ref($_[0]) }
34sub _top_dir { shift->top_dir }
35sub _my_path { shift->base_dir }
36
95148a72 37sub get {
38 my ($self, $spec) = @_;
39 $spec->{path} or die "path is required to get";
40 my ($dir, $file) = $spec->{path} =~ m{^(?:(.*)/)?([^/]+)$};
41 my $type;
5cc6d9e2 42 my @poss = io->dir($self->base_dir)->${\sub {
95148a72 43 my $io = shift;
44 defined($dir) ? $io->catdir($dir) : $io
ebd4c292 45 }}->filter(sub {
46 $_->filename =~ /^\Q${file}\E${\$self->_types_re}$/ and $type = $1
47 })
221c4151 48 ->${\sub { -e "$_[0]" ? $_[0]->all_files : () }};
95148a72 49 die "multiple files found for ${\$spec->{path}}:\n".join "\n", @poss
50 if @poss > 1;
ebd4c292 51 return undef unless @poss;
2a19b2a6 52 $self->_inflate(
d01538bc 53 $type, $self->rel_path->catdir($spec->{path}), $poss[0]
34597fb2 54 );
ebd4c292 55}
56
2a19b2a6 57sub _inflate {
d01538bc 58 my ($self, $type, $path, $io) = @_;
59 (my $cache_name = $io->name) =~ s/\/([^\/]+)$/\/.htcache.$1.json/;
60 my $cache = io($cache_name);
61 if (-f $cache_name) {
62 if ($cache->mtime >= $io->mtime) {
4e3f5bd2 63 return try {
64 $self->_new_page($path, $self->_json->decode($cache->all));
65 } catch {
66 die "Error inflating ${path} from cache: $_\n";
67 }
d01538bc 68 }
69 }
70 my $raw = $io->all;
71 try {
72 my $extracted = $self->${\"_extract_from_${type}"}($raw);
4e3f5bd2 73 try { $cache->print($self->_json->encode($extracted)); };
d01538bc 74 $self->_new_page($path, $extracted);
75 } catch {
4e3f5bd2 76 die "Error inflating ${path} as ${type}: $_\n";
2a19b2a6 77 }
78}
79
ebd4c292 80sub map {
81 my ($self, $mapper) = @_;
82 [ map $mapper->($_), $self->flatten ]
83}
84
fc436d2e 85sub _depth_under_base {
86 my ($self, $path) = @_;
87 File::Spec->splitdir(File::Spec->abs2rel($path, $self->base_dir->name))
88}
89
ebd4c292 90sub flatten {
91 my ($self) = @_;
34597fb2 92 my $slash = io->dir('/');
ebd4c292 93 map {
94 my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
2a19b2a6 95 $self->_inflate(
96 $type,
fc436d2e 97 $slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name)),
d01538bc 98 $_
ebd4c292 99 );
cbb06a72 100 } $self->_all_files;
101}
102
103sub all_paths {
104 my ($self) = @_;
105 my $slash = io->dir('/');
106 map {
107 my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
108 $slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name))->name,
109 } $self->_all_files;
110}
111
112sub _all_files {
113 my ($self) = @_;
114 return unless (my $base = $self->base_dir)->exists;
115 my %seen;
116 my $min = $self->min_depth;
117 map {
fc436d2e 118 $_->filter(sub { $_->filename =~ /${\$self->_types_re}$/ })
221c4151 119 ->all_files($self->max_depth - ($min-1))
fc436d2e 120 } map
221c4151 121 $min > 1
fc436d2e 122 ? do {
221c4151 123 # can't use ->all_dirs($min-1) since we only want the final level
124 my @x = ($_); @x = map $_->all_dirs, @x for 1..$min-1; @x
fc436d2e 125 }
126 : $_,
221c4151 127 $base;
ebd4c292 128}
129
130sub latest {
131 my ($self, $max) = @_;
132 require SCSite::LatestPageSet;
133 SCSite::LatestPageSet->new(
134 parent => $self,
135 max_entries => $max,
136 );
95148a72 137}
138
139sub _new_page {
ebd4c292 140 SCSite::Page->new({ path => $_[1], page_set => $_[0], %{$_[2]} })
95148a72 141}
142
ebd4c292 143sub _types_re { qw/\.(html|md)/ }
144
95148a72 145sub _extract_from_html {
146 my ($self, $html) = @_;
147 HTML::Zoom->from_html($html)
148 ->select('title')->collect_content({ into => \my @title })
2a19b2a6 149 ->select('meta[name=subtitle]')->collect({ into => \my @subtitle })
95148a72 150 ->select('meta[name=description]')->collect({ into => \my @description })
151 ->select('meta[name=keywords]')->collect({ into => \my @keywords })
ebd4c292 152 ->select('meta[name=created]')->collect({ into => \my @created })
95148a72 153 ->select('body')->collect_content({ into => \my @body })
154 ->run;
155 +{
ebd4c292 156 title => $title[0]->{raw}||'',
2a19b2a6 157 subtitle => $subtitle[0]->{attrs}{content}||'',
ebd4c292 158 description => $description[0]->{attrs}{content}||'',
159 keywords => $keywords[0]->{attrs}{content}||'',
160 created => $created[0]->{attrs}{content}||'',
161 body => HTML::Zoom->from_events(\@body)->to_html||'',
95148a72 162 }
163}
164
95148a72 165sub _extract_from_md {
166 my ($self, $md) = @_;
167 $self->_extract_from_html(markdown($md, { document_format => 'complete' }));
168}
169
1701;