lib/SCSite/PageSet.pm

   1 package SCSite::PageSet;
   2
   3 use Text::MultiMarkdown 'markdown';
   4 use HTML::Zoom;
   5 use Sub::Quote;
   6 use Syntax::Keyword::Gather;
   7 use SCSite::Page;
   8 use IO::All;
   9 use Try::Tiny;
  10 use JSON;
  11 use Moo;
  12
  13 with 'SCSite::PageChildren';
  14
  15 {
  16   my $j = JSON->new;
  17   sub _json { $j }
  18 }
  19
  20 has top_dir => (is => 'ro', lazy => 1, builder => 'base_dir');
  21 has base_dir => (is => 'ro', required => 1);
  22 has max_depth => (is => 'ro', default => quote_sub q{ 0 });
  23 has min_depth => (is => 'ro', default => quote_sub q{ 1 });
  24
  25 has rel_path => (is => 'lazy');
  26
  27 sub _build_rel_path {
  28   my ($self) = @_;
  29   io->dir('/')
  30     ->catdir(File::Spec->abs2rel($self->base_dir->name, $self->top_dir->name))
  31 }
  32
  33 sub _page_set_class { ref($_[0]) }
  34 sub _top_dir { shift->top_dir }
  35 sub _my_path { shift->base_dir }
  36
  37 sub get {
  38   my ($self, $spec) = @_;
  39   $spec->{path} or die "path is required to get";
  40   my ($dir, $file) = $spec->{path} =~ m{^(?:(.*)/)?([^/]+)$};
  41   my $type;
  42   my @poss = io->dir($self->base_dir)->${\sub {
  43     my $io = shift;
  44     defined($dir) ? $io->catdir($dir) : $io
  45   }}->filter(sub {
  46         $_->filename =~ /^\Q${file}\E${\$self->_types_re}$/ and $type = $1
  47       })
  48     ->${\sub { -e "$_[0]" ? $_[0]->all_files : () }};
  49   die "multiple files found for ${\$spec->{path}}:\n".join "\n", @poss
  50     if @poss > 1;
  51   return undef unless @poss;
  52   $self->_inflate(
  53     $type, $self->rel_path->catdir($spec->{path}), $poss[0]
  54   );
  55 }
  56
  57 sub _inflate {
  58   my ($self, $type, $path, $io) = @_;
  59   (my $cache_name = $io->name) =~ s/\/([^\/]+)$/\/.htcache.$1.json/;
  60   my $cache = io($cache_name);
  61   if (-f $cache_name) {
  62     if ($cache->mtime >= $io->mtime) {
  63       return try {
  64         $self->_new_page($path, $self->_json->decode($cache->all));
  65       } catch {
  66         die "Error inflating ${path} from cache: $_\n";
  67       }
  68     }
  69   }
  70   my $raw = $io->all;
  71   try {
  72     my $extracted = $self->${\"_extract_from_${type}"}($raw);
  73     try { $cache->print($self->_json->encode($extracted)); };
  74     $self->_new_page($path, $extracted);
  75   } catch {
  76     die "Error inflating ${path} as ${type}: $_\n";
  77   }
  78 }
  79
  80 sub map {
  81   my ($self, $mapper) = @_;
  82   [ map $mapper->($_), $self->flatten ]
  83 }
  84
  85 sub _depth_under_base {
  86   my ($self, $path) = @_;
  87   File::Spec->splitdir(File::Spec->abs2rel($path, $self->base_dir->name))
  88 }
  89
  90 sub flatten {
  91   my ($self) = @_;
  92   my $slash = io->dir('/');
  93   map {
  94     my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
  95     $self->_inflate(
  96       $type,
  97       $slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name)),
  98       $_
  99     );
 100   } $self->_all_files;
 101 }
 102
 103 sub all_paths {
 104   my ($self) = @_;
 105   my $slash = io->dir('/');
 106   map {
 107     my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
 108     $slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name))->name,
 109   } $self->_all_files;
 110 }
 111
 112 sub _all_files {
 113   my ($self) = @_;
 114   return unless (my $base = $self->base_dir)->exists;
 115   my %seen;
 116   my $min = $self->min_depth;
 117   map {
 118     $_->filter(sub { $_->filename =~ /${\$self->_types_re}$/ })
 119       ->all_files($self->max_depth - ($min-1))
 120   } map
 121       $min > 1
 122         ? do {
 123             # can't use ->all_dirs($min-1) since we only want the final level
 124             my @x = ($_); @x = map $_->all_dirs, @x for 1..$min-1; @x
 125           }
 126         : $_,
 127       $base;
 128 }
 129
 130 sub latest {
 131   my ($self, $max) = @_;
 132   require SCSite::LatestPageSet;
 133   SCSite::LatestPageSet->new(
 134     parent => $self,
 135     max_entries => $max,
 136   );
 137 }
 138
 139 sub _new_page {
 140   SCSite::Page->new({ path => $_[1], page_set => $_[0], %{$_[2]} })
 141 }
 142
 143 sub _types_re { qw/\.(html|md)/ }
 144
 145 sub _extract_from_html {
 146   my ($self, $html) = @_;
 147   HTML::Zoom->from_html($html)
 148     ->select('title')->collect_content({ into => \my @title })
 149     ->select('meta[name=subtitle]')->collect({ into => \my @subtitle })
 150     ->select('meta[name=description]')->collect({ into => \my @description })
 151     ->select('meta[name=keywords]')->collect({ into => \my @keywords })
 152     ->select('meta[name=created]')->collect({ into => \my @created })
 153     ->select('body')->collect_content({ into => \my @body })
 154     ->run;
 155   +{
 156     title => $title[0]->{raw}||'',
 157     subtitle => $subtitle[0]->{attrs}{content}||'',
 158     description => $description[0]->{attrs}{content}||'',
 159     keywords => $keywords[0]->{attrs}{content}||'',
 160     created => $created[0]->{attrs}{content}||'',
 161     body => HTML::Zoom->from_events(\@body)->to_html||'',
 162   }
 163 }
 164
 165 sub _extract_from_md {
 166   my ($self, $md) = @_;
 167   $self->_extract_from_html(markdown($md, { document_format => 'complete' }));
 168 }
 169
 170 1;