lib/SCSite/PageSet.pm

   1 package SCSite::PageSet;
   2
   3 use Text::MultiMarkdown 'markdown';
   4 use HTML::Zoom;
   5 use Sub::Quote;
   6 use Syntax::Keyword::Gather;
   7 use SCSite::Page;
   8 use IO::All;
   9 use Try::Tiny;
  10 use JSON;
  11 use Moo;
  12
  13 {
  14   my $j = JSON->new;
  15   sub _json { $j }
  16 }
  17
  18 has top_dir => (is => 'ro', lazy => 1, builder => 'base_dir');
  19 has base_dir => (is => 'ro', required => 1);
  20 has max_depth => (is => 'ro', default => quote_sub q{ 0 });
  21 has min_depth => (is => 'ro', default => quote_sub q{ 1 });
  22
  23 has rel_path => (is => 'lazy');
  24
  25 sub _build_rel_path {
  26   my ($self) = @_;
  27   io->dir('/')
  28     ->catdir(File::Spec->abs2rel($self->base_dir->name, $self->top_dir->name))
  29 }
  30
  31 sub get {
  32   my ($self, $spec) = @_;
  33   $spec->{path} or die "path is required to get";
  34   my ($dir, $file) = $spec->{path} =~ m{^(?:(.*)/)?([^/]+)$};
  35   my $type;
  36   my @poss = io->dir($self->base_dir)->${\sub {
  37     my $io = shift;
  38     defined($dir) ? $io->catdir($dir) : $io
  39   }}->filter(sub {
  40         $_->filename =~ /^\Q${file}\E${\$self->_types_re}$/ and $type = $1
  41       })
  42     ->${\sub { -e "$_[0]" ? $_[0]->all_files : () }};
  43   die "multiple files found for ${\$spec->{path}}:\n".join "\n", @poss
  44     if @poss > 1;
  45   return undef unless @poss;
  46   $self->_inflate(
  47     $type, $self->rel_path->catdir($spec->{path}), $poss[0]
  48   );
  49 }
  50
  51 sub _inflate {
  52   my ($self, $type, $path, $io) = @_;
  53   (my $cache_name = $io->name) =~ s/\/([^\/]+)$/\/.htcache.$1.json/;
  54   my $cache = io($cache_name);
  55   if (-f $cache_name) {
  56     if ($cache->mtime >= $io->mtime) {
  57       return try {
  58         $self->_new_page($path, $self->_json->decode($cache->all));
  59       } catch {
  60         die "Error inflating ${path} from cache: $_\n";
  61       }
  62     }
  63   }
  64   my $raw = $io->all;
  65   try {
  66     my $extracted = $self->${\"_extract_from_${type}"}($raw);
  67     try { $cache->print($self->_json->encode($extracted)); };
  68     $self->_new_page($path, $extracted);
  69   } catch {
  70     die "Error inflating ${path} as ${type}: $_\n";
  71   }
  72 }
  73
  74 sub map {
  75   my ($self, $mapper) = @_;
  76   [ map $mapper->($_), $self->flatten ]
  77 }
  78
  79 sub _depth_under_base {
  80   my ($self, $path) = @_;
  81   File::Spec->splitdir(File::Spec->abs2rel($path, $self->base_dir->name))
  82 }
  83
  84 sub flatten {
  85   my ($self) = @_;
  86   return unless (my $base = $self->base_dir)->exists;
  87   my %seen;
  88   my $slash = io->dir('/');
  89   my $min = $self->min_depth;
  90   map {
  91     my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
  92     $self->_inflate(
  93       $type,
  94       $slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name)),
  95       $_
  96     );
  97   } map {
  98     $_->filter(sub { $_->filename =~ /${\$self->_types_re}$/ })
  99       ->all_files($self->max_depth - ($min-1))
 100   } map
 101       $min > 1
 102         ? do {
 103             # can't use ->all_dirs($min-1) since we only want the final level
 104             my @x = ($_); @x = map $_->all_dirs, @x for 1..$min-1; @x
 105           }
 106         : $_,
 107       $base;
 108 }
 109
 110 sub latest {
 111   my ($self, $max) = @_;
 112   require SCSite::LatestPageSet;
 113   SCSite::LatestPageSet->new(
 114     parent => $self,
 115     max_entries => $max,
 116   );
 117 }
 118
 119 sub _new_page {
 120   SCSite::Page->new({ path => $_[1], page_set => $_[0], %{$_[2]} })
 121 }
 122
 123 sub _types_re { qw/\.(html|md)/ }
 124
 125 sub _extract_from_html {
 126   my ($self, $html) = @_;
 127   HTML::Zoom->from_html($html)
 128     ->select('title')->collect_content({ into => \my @title })
 129     ->select('meta[name=subtitle]')->collect({ into => \my @subtitle })
 130     ->select('meta[name=description]')->collect({ into => \my @description })
 131     ->select('meta[name=keywords]')->collect({ into => \my @keywords })
 132     ->select('meta[name=created]')->collect({ into => \my @created })
 133     ->select('body')->collect_content({ into => \my @body })
 134     ->run;
 135   +{
 136     title => $title[0]->{raw}||'',
 137     subtitle => $subtitle[0]->{attrs}{content}||'',
 138     description => $description[0]->{attrs}{content}||'',
 139     keywords => $keywords[0]->{attrs}{content}||'',
 140     created => $created[0]->{attrs}{content}||'',
 141     body => HTML::Zoom->from_events(\@body)->to_html||'',
 142   }
 143 }
 144
 145 sub _extract_from_md {
 146   my ($self, $md) = @_;
 147   $self->_extract_from_html(markdown($md, { document_format => 'complete' }));
 148 }
 149
 150 1;