spelling
[gitmo/moose-website.git] / t / 02-spelling.t
CommitLineData
d45dd0ef 1#!perl
2use 5.010;
3use utf8;
4use strict;
5use warnings FATAL => 'all';
6use autodie qw(:all);
7use Capture::Tiny qw(capture);
8use Encode qw(decode_utf8);
9use File::Next qw();
10use File::Temp qw(tempfile);
11use File::Which qw(which);
12use Test::More;
13use XML::LibXML qw();
14use XML::LibXSLT qw();
15
16binmode Test::More->builder->$_, ':encoding(UTF-8)'
17 for qw(output failure_output todo_output);
18
19# Skip means sweep bugs under the rug.
20# I want this test to be actually run.
21BAIL_OUT 'aspell is not installed.' unless which 'aspell';
22
23my @stopwords;
24for (<DATA>) {
25 chomp;
26 push @stopwords, $_ unless /\A (?: \# | \s* \z)/msx; # skip comments, whitespace
27}
28
29my $destdir;
30{
31 my $runtime_params_file = '_build/runtime_params';
32 my $runtime_params = do $runtime_params_file;
33 die "Could not load $runtime_params_file. Run Build.PL first.\n"
34 unless $runtime_params;
35 $destdir = $runtime_params->{destdir};
36}
37
38my $iter = File::Next::files({
39 file_filter => sub {/\.html \z/msx},
40 sort_files => 1,
41 },
42 $destdir
43);
44
45my $file_counter;
46
47my $stylesheet = XML::LibXSLT->new->parse_stylesheet(
48 XML::LibXML->load_xml(string => <<''));
49<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
50 xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0">
e95a25e2 51 <xsl:template match="xhtml:abbr"/>
52 <xsl:template match="xhtml:acronym"/>
53 <xsl:template match="xhtml:code"/> <!-- filter computerese -->
d45dd0ef 54 <xsl:template match="@* | node()"> <!-- apply identity function to rest of nodes -->
55 <xsl:copy>
56 <xsl:apply-templates select="@* | node()"/>
57 </xsl:copy>
58 </xsl:template>
59</xsl:stylesheet>
60
61while (defined(my $html_file = $iter->())) {
62 $file_counter++;
63
64 my ($temp_handle, $temp_file) = tempfile;
65 my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,));
66 $stylesheet->output_fh($transformed, $temp_handle);
67
68 my ($stdout) = capture {
69 system "aspell -H --encoding=UTF-8 -l en list < $temp_file";
70 };
71 my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout;
72 ok !@misspelt_words, "$html_file ($temp_file) spell-check";
73 diag join "\n", sort @misspelt_words if @misspelt_words;
74}
75
76done_testing($file_counter);
77
78__DATA__
79## personal names
e95a25e2 80Brocard
81Bunce's
82# Pierce Cawley
83Cawley's
84Champoux
85chromatic
86# Sam Crawley
87Crawley
88cuny's
89Doran
90franck
91Grünauer
92hakobe's
93hanekomu
94Hengst's
95# Kanat-Alexander
96Kanat
97Kogman's
10b2c9bf 98Kuri's
e95a25e2 99Léon
100Napiorkowski
101Pearcey's
102Prather
103Prather's
104Ragwitz
105Rockway
10b2c9bf 106Rodighiero
e95a25e2 107Rolsky's
108Stevan
109sunnavy's
110Treder's
111trombik
112Vecchi
10b2c9bf 113Vilain
e95a25e2 114Vilain's
115Yanick
116Yuval
d45dd0ef 117
118## proper names
e95a25e2 119BizRate
120Cisco
121Cloudtone
10b2c9bf 122DoctorBase
e95a25e2 123Endeworks
124GitHub
125Gource
126IMDb
127# Kansai.pm
128Kansai
129LinuxMag
130MedTouch
131MusicBrainz
132OCaml
133Omni
134OnLAMP
135PerlMonks
136Pobox
137Shadowcat
138Shopzilla
139SimplyClick
140Simula
141SocialText
142Symantec
143Takkle
144Tamarou
145TextMate
146ValueClick
d45dd0ef 147
148## Moose-specific
149
150## computerese
e95a25e2 151parameterized
d45dd0ef 152
153## other jargon
154
155## neologisms
e95a25e2 156blog
157podcast
d45dd0ef 158
159## compound
e95a25e2 160# post-mortem
161mortem
162# PDX.pm
163PDX
164# London.pm's
165pm's
d45dd0ef 166
167## slang
168
169## things that should be in the dictionary, but are not
e95a25e2 170Bioinformatics
171Committers
172refactoring
173Refactoring
d45dd0ef 174
175## single foreign words
176
177## misspelt on purpose