ameliorate markup semantics and spelling
[gitmo/moose-website.git] / t / 02-spelling.t
CommitLineData
d45dd0ef 1#!perl
2use 5.010;
3use utf8;
4use strict;
5use warnings FATAL => 'all';
6use autodie qw(:all);
7use Capture::Tiny qw(capture);
8use Encode qw(decode_utf8);
9use File::Next qw();
10use File::Temp qw(tempfile);
11use File::Which qw(which);
12use Test::More;
13use XML::LibXML qw();
14use XML::LibXSLT qw();
15
16binmode Test::More->builder->$_, ':encoding(UTF-8)'
17 for qw(output failure_output todo_output);
18
19# Skip means sweep bugs under the rug.
20# I want this test to be actually run.
21BAIL_OUT 'aspell is not installed.' unless which 'aspell';
22
23my @stopwords;
24for (<DATA>) {
25 chomp;
26 push @stopwords, $_ unless /\A (?: \# | \s* \z)/msx; # skip comments, whitespace
27}
28
29my $destdir;
30{
31 my $runtime_params_file = '_build/runtime_params';
32 my $runtime_params = do $runtime_params_file;
33 die "Could not load $runtime_params_file. Run Build.PL first.\n"
34 unless $runtime_params;
35 $destdir = $runtime_params->{destdir};
36}
37
38my $iter = File::Next::files({
39 file_filter => sub {/\.html \z/msx},
40 sort_files => 1,
41 },
42 $destdir
43);
44
45my $file_counter;
46
47my $stylesheet = XML::LibXSLT->new->parse_stylesheet(
48 XML::LibXML->load_xml(string => <<''));
49<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
50 xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0">
e95a25e2 51 <xsl:template match="xhtml:abbr"/>
52 <xsl:template match="xhtml:acronym"/>
53 <xsl:template match="xhtml:code"/> <!-- filter computerese -->
d45dd0ef 54 <xsl:template match="@* | node()"> <!-- apply identity function to rest of nodes -->
55 <xsl:copy>
56 <xsl:apply-templates select="@* | node()"/>
57 </xsl:copy>
58 </xsl:template>
59</xsl:stylesheet>
60
61while (defined(my $html_file = $iter->())) {
62 $file_counter++;
63
64 my ($temp_handle, $temp_file) = tempfile;
65 my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,));
66 $stylesheet->output_fh($transformed, $temp_handle);
67
68 my ($stdout) = capture {
69 system "aspell -H --encoding=UTF-8 -l en list < $temp_file";
70 };
71 my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout;
72 ok !@misspelt_words, "$html_file ($temp_file) spell-check";
73 diag join "\n", sort @misspelt_words if @misspelt_words;
74}
75
76done_testing($file_counter);
77
78__DATA__
79## personal names
e95a25e2 80Brocard
81Bunce's
82# Pierce Cawley
83Cawley's
84Champoux
85chromatic
86# Sam Crawley
87Crawley
88cuny's
89Doran
90franck
91Grünauer
92hakobe's
93hanekomu
94Hengst's
95# Kanat-Alexander
96Kanat
97Kogman's
98Léon
99Napiorkowski
100Pearcey's
101Prather
102Prather's
103Ragwitz
104Rockway
105Rolsky's
106Stevan
107sunnavy's
108Treder's
109trombik
110Vecchi
111Vilain's
112Yanick
113Yuval
d45dd0ef 114
115## proper names
e95a25e2 116BizRate
117Cisco
118Cloudtone
119Endeworks
120GitHub
121Gource
122IMDb
123# Kansai.pm
124Kansai
125LinuxMag
126MedTouch
127MusicBrainz
128OCaml
129Omni
130OnLAMP
131PerlMonks
132Pobox
133Shadowcat
134Shopzilla
135SimplyClick
136Simula
137SocialText
138Symantec
139Takkle
140Tamarou
141TextMate
142ValueClick
d45dd0ef 143
144## Moose-specific
145
146## computerese
e95a25e2 147parameterized
d45dd0ef 148
149## other jargon
150
151## neologisms
e95a25e2 152blog
153podcast
d45dd0ef 154
155## compound
e95a25e2 156# post-mortem
157mortem
158# PDX.pm
159PDX
160# London.pm's
161pm's
d45dd0ef 162
163## slang
164
165## things that should be in the dictionary, but are not
e95a25e2 166Bioinformatics
167Committers
168refactoring
169Refactoring
d45dd0ef 170
171## single foreign words
172
173## misspelt on purpose