[scpubgit/SCS.git] / lib / SCSite / PageSet.pm

package SCSite::PageSet;

use Text::MultiMarkdown 'markdown';
use HTML::Zoom;
use Sub::Quote;
use Syntax::Keyword::Gather;
use SCSite::Page;
use IO::All;
use Try::Tiny;
use JSON;
use Moo;

{
  my $j = JSON->new;
  sub _json { $j }
}

has top_dir => (is => 'ro', lazy => 1, builder => 'base_dir');
has base_dir => (is => 'ro', required => 1);
has max_depth => (is => 'ro', default => quote_sub q{ 0 });
has min_depth => (is => 'ro', default => quote_sub q{ 1 });

has rel_path => (is => 'lazy');

sub _build_rel_path {
  my ($self) = @_;
  io->dir('/')
    ->catdir(File::Spec->abs2rel($self->base_dir->name, $self->top_dir->name))
}

sub get {
  my ($self, $spec) = @_;
  $spec->{path} or die "path is required to get";
  my ($dir, $file) = $spec->{path} =~ m{^(?:(.*)/)?([^/]+)$};
  my $type;
  my @poss = io->dir($self->base_dir)->${\sub {
    my $io = shift;
    defined($dir) ? $io->catdir($dir) : $io
  }}->filter(sub {
        $_->filename =~ /^\Q${file}\E${\$self->_types_re}$/ and $type = $1
      })
    ->${\sub { -e "$_[0]" ? $_[0]->all_files : () }};
  die "multiple files found for ${\$spec->{path}}:\n".join "\n", @poss
    if @poss > 1;
  return undef unless @poss;
  $self->_inflate(
    $type, $self->rel_path->catdir($spec->{path}), $poss[0]
  );
}

sub _inflate {
  my ($self, $type, $path, $io) = @_;
  (my $cache_name = $io->name) =~ s/\/([^\/]+)$/\/.htcache.$1.json/;
  my $cache = io($cache_name);
  if (-f $cache_name) {
    if ($cache->mtime >= $io->mtime) {
      return try {
        $self->_new_page($path, $self->_json->decode($cache->all));
      } catch {
        die "Error inflating ${path} from cache: $_\n";
      }
    }
  }
  my $raw = $io->all;
  try {
    my $extracted = $self->${\"_extract_from_${type}"}($raw);
    try { $cache->print($self->_json->encode($extracted)); };
    $self->_new_page($path, $extracted);
  } catch {
    die "Error inflating ${path} as ${type}: $_\n";
  }
}

sub map {
  my ($self, $mapper) = @_;
  [ map $mapper->($_), $self->flatten ]
}

sub _depth_under_base {
  my ($self, $path) = @_;
  File::Spec->splitdir(File::Spec->abs2rel($path, $self->base_dir->name))
}

sub flatten {
  my ($self) = @_;
  return unless (my $base = $self->base_dir)->exists;
  my %seen;
  my $slash = io->dir('/');
  my $min = $self->min_depth;
  map {
    my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
    $self->_inflate(
      $type,
      $slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name)),
      $_
    );
  } map {
    $_->filter(sub { $_->filename =~ /${\$self->_types_re}$/ })
      ->all_files($self->max_depth - ($min-1))
  } map
      $min > 1
        ? do {
            # can't use ->all_dirs($min-1) since we only want the final level
            my @x = ($_); @x = map $_->all_dirs, @x for 1..$min-1; @x
          }
        : $_,
      $base;
}

sub latest {
  my ($self, $max) = @_;
  require SCSite::LatestPageSet;
  SCSite::LatestPageSet->new(
    parent => $self,
    max_entries => $max,
  );
}

sub _new_page {
  SCSite::Page->new({ path => $_[1], page_set => $_[0], %{$_[2]} })
}

sub _types_re { qw/\.(html|md)/ }

sub _extract_from_html {
  my ($self, $html) = @_;
  HTML::Zoom->from_html($html)
    ->select('title')->collect_content({ into => \my @title })
    ->select('meta[name=subtitle]')->collect({ into => \my @subtitle })
    ->select('meta[name=description]')->collect({ into => \my @description })
    ->select('meta[name=keywords]')->collect({ into => \my @keywords })
    ->select('meta[name=created]')->collect({ into => \my @created })
    ->select('body')->collect_content({ into => \my @body })
    ->run;
  +{
    title => $title[0]->{raw}||'',
    subtitle => $subtitle[0]->{attrs}{content}||'',
    description => $description[0]->{attrs}{content}||'',
    keywords => $keywords[0]->{attrs}{content}||'',
    created => $created[0]->{attrs}{content}||'',
    body => HTML::Zoom->from_events(\@body)->to_html||'',
  }
}

sub _extract_from_md {
  my ($self, $md) = @_;
  $self->_extract_from_html(markdown($md, { document_format => 'complete' }));
}

1;
Commit	Line	Data
95148a72	1	package SCSite::PageSet;
95148a72	2
95148a72	3	use Text::MultiMarkdown 'markdown';
95148a72	4	use HTML::Zoom;
ebd4c292	5	use Sub::Quote;
	6	use Syntax::Keyword::Gather;
	7	use SCSite::Page;
d01538bc	8	use IO::All;
2a19b2a6	9	use Try::Tiny;
d01538bc	10	use JSON;
	11	use Moo;
	12
	13	{
	14	my $j = JSON->new;
	15	sub _json { $j }
	16	}
95148a72	17
34597fb2	18	has top_dir => (is => 'ro', lazy => 1, builder => 'base_dir');
95148a72	19	has base_dir => (is => 'ro', required => 1);
ebd4c292	20	has max_depth => (is => 'ro', default => quote_sub q{ 0 });
221c4151	21	has min_depth => (is => 'ro', default => quote_sub q{ 1 });
95148a72	22
34597fb2	23	has rel_path => (is => 'lazy');
	24
	25	sub _build_rel_path {
	26	my ($self) = @_;
	27	io->dir('/')
	28	->catdir(File::Spec->abs2rel($self->base_dir->name, $self->top_dir->name))
	29	}
	30
95148a72	31	sub get {
	32	my ($self, $spec) = @_;
	33	$spec->{path} or die "path is required to get";
	34	my ($dir, $file) = $spec->{path} =~ m{^(?:(.*)/)?([^/]+)$};
	35	my $type;
5cc6d9e2	36	my @poss = io->dir($self->base_dir)->${\sub {
95148a72	37	my $io = shift;
95148a72	38	defined($dir) ? $io->catdir($dir) : $io
ebd4c292	39	}}->filter(sub {
	40	$_->filename =~ /^\Q${file}\E${\$self->_types_re}$/ and $type = $1
	41	})
221c4151	42	->${\sub { -e "$_[0]" ? $_[0]->all_files : () }};
95148a72	43	die "multiple files found for ${\$spec->{path}}:\n".join "\n", @poss
95148a72	44	if @poss > 1;
ebd4c292	45	return undef unless @poss;
2a19b2a6	46	$self->_inflate(
d01538bc	47	$type, $self->rel_path->catdir($spec->{path}), $poss[0]
34597fb2	48	);
ebd4c292	49	}
ebd4c292	50
2a19b2a6	51	sub _inflate {
d01538bc	52	my ($self, $type, $path, $io) = @_;
	53	(my $cache_name = $io->name) =~ s/\/([^\/]+)$/\/.htcache.$1.json/;
	54	my $cache = io($cache_name);
	55	if (-f $cache_name) {
	56	if ($cache->mtime >= $io->mtime) {
4e3f5bd2	57	return try {
	58	$self->_new_page($path, $self->_json->decode($cache->all));
	59	} catch {
	60	die "Error inflating ${path} from cache: $_\n";
	61	}
d01538bc	62	}
	63	}
	64	my $raw = $io->all;
	65	try {
	66	my $extracted = $self->${\"_extract_from_${type}"}($raw);
4e3f5bd2	67	try { $cache->print($self->_json->encode($extracted)); };
d01538bc	68	$self->_new_page($path, $extracted);
d01538bc	69	} catch {
4e3f5bd2	70	die "Error inflating ${path} as ${type}: $_\n";
2a19b2a6	71	}
	72	}
	73
ebd4c292	74	sub map {
	75	my ($self, $mapper) = @_;
	76	[ map $mapper->($_), $self->flatten ]
	77	}
	78
fc436d2e	79	sub _depth_under_base {
	80	my ($self, $path) = @_;
	81	File::Spec->splitdir(File::Spec->abs2rel($path, $self->base_dir->name))
	82	}
	83
ebd4c292	84	sub flatten {
ebd4c292	85	my ($self) = @_;
fc436d2e	86	return unless (my $base = $self->base_dir)->exists;
ebd4c292	87	my %seen;
34597fb2	88	my $slash = io->dir('/');
fc436d2e	89	my $min = $self->min_depth;
ebd4c292	90	map {
ebd4c292	91	my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
2a19b2a6	92	$self->_inflate(
2a19b2a6	93	$type,
fc436d2e	94	$slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name)),
d01538bc	95	$_
ebd4c292	96	);
fc436d2e	97	} map {
fc436d2e	98	$_->filter(sub { $_->filename =~ /${\$self->_types_re}$/ })
221c4151	99	->all_files($self->max_depth - ($min-1))
fc436d2e	100	} map
221c4151	101	$min > 1
fc436d2e	102	? do {
221c4151	103	# can't use ->all_dirs($min-1) since we only want the final level
221c4151	104	my @x = ($_); @x = map $_->all_dirs, @x for 1..$min-1; @x
fc436d2e	105	}
fc436d2e	106	: $_,
221c4151	107	$base;
ebd4c292	108	}
	109
	110	sub latest {
	111	my ($self, $max) = @_;
	112	require SCSite::LatestPageSet;
	113	SCSite::LatestPageSet->new(
	114	parent => $self,
	115	max_entries => $max,
	116	);
95148a72	117	}
	118
	119	sub _new_page {
ebd4c292	120	SCSite::Page->new({ path => $_[1], page_set => $_[0], %{$_[2]} })
95148a72	121	}
95148a72	122
ebd4c292	123	sub _types_re { qw/\.(html\|md)/ }
ebd4c292	124
95148a72	125	sub _extract_from_html {
	126	my ($self, $html) = @_;
	127	HTML::Zoom->from_html($html)
	128	->select('title')->collect_content({ into => \my @title })
2a19b2a6	129	->select('meta[name=subtitle]')->collect({ into => \my @subtitle })
95148a72	130	->select('meta[name=description]')->collect({ into => \my @description })
95148a72	131	->select('meta[name=keywords]')->collect({ into => \my @keywords })
ebd4c292	132	->select('meta[name=created]')->collect({ into => \my @created })
95148a72	133	->select('body')->collect_content({ into => \my @body })
	134	->run;
	135	+{
ebd4c292	136	title => $title[0]->{raw}\|\|'',
2a19b2a6	137	subtitle => $subtitle[0]->{attrs}{content}\|\|'',
ebd4c292	138	description => $description[0]->{attrs}{content}\|\|'',
	139	keywords => $keywords[0]->{attrs}{content}\|\|'',
	140	created => $created[0]->{attrs}{content}\|\|'',
	141	body => HTML::Zoom->from_events(\@body)->to_html\|\|'',
95148a72	142	}
	143	}
	144
95148a72	145	sub _extract_from_md {
	146	my ($self, $md) = @_;
	147	$self->_extract_from_html(markdown($md, { document_format => 'complete' }));
	148	}
	149
	150	1;