[scpubgit/SCS.git] / lib / SCSite / PageSet.pm

package SCSite::PageSet;

use Text::MultiMarkdown 'markdown';
use HTML::Zoom;
use Sub::Quote;
use Syntax::Keyword::Gather;
use SCSite::Page;
use IO::All;
use Try::Tiny;
use JSON;
use Moo;

{
  my $j = JSON->new;
  sub _json { $j }
}

has top_dir => (is => 'ro', lazy => 1, builder => 'base_dir');
has base_dir => (is => 'ro', required => 1);
has max_depth => (is => 'ro', default => quote_sub q{ 0 });
has min_depth => (is => 'ro', default => quote_sub q{ 1 });

has rel_path => (is => 'lazy');

sub _build_rel_path {
  my ($self) = @_;
  io->dir('/')
    ->catdir(File::Spec->abs2rel($self->base_dir->name, $self->top_dir->name))
}

sub get {
  my ($self, $spec) = @_;
  $spec->{path} or die "path is required to get";
  my ($dir, $file) = $spec->{path} =~ m{^(?:(.*)/)?([^/]+)$};
  my $type;
  my @poss = io->dir($self->base_dir)->${\sub {
    my $io = shift;
    defined($dir) ? $io->catdir($dir) : $io
  }}->filter(sub {
        $_->filename =~ /^\Q${file}\E${\$self->_types_re}$/ and $type = $1
      })
    ->${\sub { -e "$_[0]" ? $_[0]->all_files : () }};
  die "multiple files found for ${\$spec->{path}}:\n".join "\n", @poss
    if @poss > 1;
  return undef unless @poss;
  $self->_inflate(
    $type, $self->rel_path->catdir($spec->{path}), $poss[0]
  );
}

sub _inflate {
  my ($self, $type, $path, $io) = @_;
  (my $cache_name = $io->name) =~ s/\/([^\/]+)$/\/.htcache.$1.json/;
  my $cache = io($cache_name);
  if (-f $cache_name) {
    if ($cache->mtime >= $io->mtime) {
      return $self->_new_page($path, $self->_json->decode($cache->all));
    }
  }
  my $raw = $io->all;
  try {
    my $extracted = $self->${\"_extract_from_${type}"}($raw);
    $cache->print($self->_json->encode($extracted));
    $self->_new_page($path, $extracted);
  } catch {
    die "Error inflating ${path} as ${type}: $_\n\nData was: ${raw}";
  }
}

sub map {
  my ($self, $mapper) = @_;
  [ map $mapper->($_), $self->flatten ]
}

sub _depth_under_base {
  my ($self, $path) = @_;
  File::Spec->splitdir(File::Spec->abs2rel($path, $self->base_dir->name))
}

sub flatten {
  my ($self) = @_;
  return unless (my $base = $self->base_dir)->exists;
  my %seen;
  my $slash = io->dir('/');
  my $min = $self->min_depth;
  map {
    my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
    $self->_inflate(
      $type,
      $slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name)),
      $_
    );
  } map {
    $_->filter(sub { $_->filename =~ /${\$self->_types_re}$/ })
      ->all_files($self->max_depth - ($min-1))
  } map
      $min > 1
        ? do {
            # can't use ->all_dirs($min-1) since we only want the final level
            my @x = ($_); @x = map $_->all_dirs, @x for 1..$min-1; @x
          }
        : $_,
      $base;
}

sub latest {
  my ($self, $max) = @_;
  require SCSite::LatestPageSet;
  SCSite::LatestPageSet->new(
    parent => $self,
    max_entries => $max,
  );
}

sub _new_page {
  SCSite::Page->new({ path => $_[1], page_set => $_[0], %{$_[2]} })
}

sub _types_re { qw/\.(html|md)/ }

sub _extract_from_html {
  my ($self, $html) = @_;
  HTML::Zoom->from_html($html)
    ->select('title')->collect_content({ into => \my @title })
    ->select('meta[name=subtitle]')->collect({ into => \my @subtitle })
    ->select('meta[name=description]')->collect({ into => \my @description })
    ->select('meta[name=keywords]')->collect({ into => \my @keywords })
    ->select('meta[name=created]')->collect({ into => \my @created })
    ->select('body')->collect_content({ into => \my @body })
    ->run;
  +{
    title => $title[0]->{raw}||'',
    subtitle => $subtitle[0]->{attrs}{content}||'',
    description => $description[0]->{attrs}{content}||'',
    keywords => $keywords[0]->{attrs}{content}||'',
    created => $created[0]->{attrs}{content}||'',
    body => HTML::Zoom->from_events(\@body)->to_html||'',
  }
}

sub _extract_from_md {
  my ($self, $md) = @_;
  $self->_extract_from_html(markdown($md, { document_format => 'complete' }));
}

1;
Commit	Line	Data
95148a72	1	package SCSite::PageSet;
95148a72	2
95148a72	3	use Text::MultiMarkdown 'markdown';
95148a72	4	use HTML::Zoom;
ebd4c292	5	use Sub::Quote;
	6	use Syntax::Keyword::Gather;
	7	use SCSite::Page;
d01538bc	8	use IO::All;
2a19b2a6	9	use Try::Tiny;
d01538bc	10	use JSON;
	11	use Moo;
	12
	13	{
	14	my $j = JSON->new;
	15	sub _json { $j }
	16	}
95148a72	17
34597fb2	18	has top_dir => (is => 'ro', lazy => 1, builder => 'base_dir');
95148a72	19	has base_dir => (is => 'ro', required => 1);
ebd4c292	20	has max_depth => (is => 'ro', default => quote_sub q{ 0 });
221c4151	21	has min_depth => (is => 'ro', default => quote_sub q{ 1 });
95148a72	22
34597fb2	23	has rel_path => (is => 'lazy');
	24
	25	sub _build_rel_path {
	26	my ($self) = @_;
	27	io->dir('/')
	28	->catdir(File::Spec->abs2rel($self->base_dir->name, $self->top_dir->name))
	29	}
	30
95148a72	31	sub get {
	32	my ($self, $spec) = @_;
	33	$spec->{path} or die "path is required to get";
	34	my ($dir, $file) = $spec->{path} =~ m{^(?:(.*)/)?([^/]+)$};
	35	my $type;
5cc6d9e2	36	my @poss = io->dir($self->base_dir)->${\sub {
95148a72	37	my $io = shift;
95148a72	38	defined($dir) ? $io->catdir($dir) : $io
ebd4c292	39	}}->filter(sub {
	40	$_->filename =~ /^\Q${file}\E${\$self->_types_re}$/ and $type = $1
	41	})
221c4151	42	->${\sub { -e "$_[0]" ? $_[0]->all_files : () }};
95148a72	43	die "multiple files found for ${\$spec->{path}}:\n".join "\n", @poss
95148a72	44	if @poss > 1;
ebd4c292	45	return undef unless @poss;
2a19b2a6	46	$self->_inflate(
d01538bc	47	$type, $self->rel_path->catdir($spec->{path}), $poss[0]
34597fb2	48	);
ebd4c292	49	}
ebd4c292	50
2a19b2a6	51	sub _inflate {
d01538bc	52	my ($self, $type, $path, $io) = @_;
	53	(my $cache_name = $io->name) =~ s/\/([^\/]+)$/\/.htcache.$1.json/;
	54	my $cache = io($cache_name);
	55	if (-f $cache_name) {
	56	if ($cache->mtime >= $io->mtime) {
	57	return $self->_new_page($path, $self->_json->decode($cache->all));
	58	}
	59	}
	60	my $raw = $io->all;
	61	try {
	62	my $extracted = $self->${\"_extract_from_${type}"}($raw);
	63	$cache->print($self->_json->encode($extracted));
	64	$self->_new_page($path, $extracted);
	65	} catch {
	66	die "Error inflating ${path} as ${type}: $_\n\nData was: ${raw}";
2a19b2a6	67	}
	68	}
	69
ebd4c292	70	sub map {
	71	my ($self, $mapper) = @_;
	72	[ map $mapper->($_), $self->flatten ]
	73	}
	74
fc436d2e	75	sub _depth_under_base {
	76	my ($self, $path) = @_;
	77	File::Spec->splitdir(File::Spec->abs2rel($path, $self->base_dir->name))
	78	}
	79
ebd4c292	80	sub flatten {
ebd4c292	81	my ($self) = @_;
fc436d2e	82	return unless (my $base = $self->base_dir)->exists;
ebd4c292	83	my %seen;
34597fb2	84	my $slash = io->dir('/');
fc436d2e	85	my $min = $self->min_depth;
ebd4c292	86	map {
ebd4c292	87	my ($path, $type) = $_->name =~ /^(.*)${\$self->_types_re}$/;
2a19b2a6	88	$self->_inflate(
2a19b2a6	89	$type,
fc436d2e	90	$slash->catdir(File::Spec->abs2rel($path, $self->top_dir->name)),
d01538bc	91	$_
ebd4c292	92	);
fc436d2e	93	} map {
fc436d2e	94	$_->filter(sub { $_->filename =~ /${\$self->_types_re}$/ })
221c4151	95	->all_files($self->max_depth - ($min-1))
fc436d2e	96	} map
221c4151	97	$min > 1
fc436d2e	98	? do {
221c4151	99	# can't use ->all_dirs($min-1) since we only want the final level
221c4151	100	my @x = ($_); @x = map $_->all_dirs, @x for 1..$min-1; @x
fc436d2e	101	}
fc436d2e	102	: $_,
221c4151	103	$base;
ebd4c292	104	}
	105
	106	sub latest {
	107	my ($self, $max) = @_;
	108	require SCSite::LatestPageSet;
	109	SCSite::LatestPageSet->new(
	110	parent => $self,
	111	max_entries => $max,
	112	);
95148a72	113	}
	114
	115	sub _new_page {
ebd4c292	116	SCSite::Page->new({ path => $_[1], page_set => $_[0], %{$_[2]} })
95148a72	117	}
95148a72	118
ebd4c292	119	sub _types_re { qw/\.(html\|md)/ }
ebd4c292	120
95148a72	121	sub _extract_from_html {
	122	my ($self, $html) = @_;
	123	HTML::Zoom->from_html($html)
	124	->select('title')->collect_content({ into => \my @title })
2a19b2a6	125	->select('meta[name=subtitle]')->collect({ into => \my @subtitle })
95148a72	126	->select('meta[name=description]')->collect({ into => \my @description })
95148a72	127	->select('meta[name=keywords]')->collect({ into => \my @keywords })
ebd4c292	128	->select('meta[name=created]')->collect({ into => \my @created })
95148a72	129	->select('body')->collect_content({ into => \my @body })
	130	->run;
	131	+{
ebd4c292	132	title => $title[0]->{raw}\|\|'',
2a19b2a6	133	subtitle => $subtitle[0]->{attrs}{content}\|\|'',
ebd4c292	134	description => $description[0]->{attrs}{content}\|\|'',
	135	keywords => $keywords[0]->{attrs}{content}\|\|'',
	136	created => $created[0]->{attrs}{content}\|\|'',
	137	body => HTML::Zoom->from_events(\@body)->to_html\|\|'',
95148a72	138	}
	139	}
	140
95148a72	141	sub _extract_from_md {
	142	my ($self, $md) = @_;
	143	$self->_extract_from_html(markdown($md, { document_format => 'complete' }));
	144	}
	145
	146	1;