spelling
[gitmo/moose-website.git] / t / 02-spelling.t
CommitLineData
d45dd0ef 1#!perl
2use 5.010;
3use utf8;
4use strict;
5use warnings FATAL => 'all';
6use autodie qw(:all);
7use Capture::Tiny qw(capture);
49046f7e 8use Encode qw(encode_utf8 decode_utf8);
d45dd0ef 9use File::Next qw();
10use File::Temp qw(tempfile);
11use File::Which qw(which);
49046f7e 12use Module::Build qw();
d45dd0ef 13use Test::More;
14use XML::LibXML qw();
15use XML::LibXSLT qw();
49046f7e 16use YAML::XS qw(Load);
d45dd0ef 17
18binmode Test::More->builder->$_, ':encoding(UTF-8)'
19 for qw(output failure_output todo_output);
20
21# Skip means sweep bugs under the rug.
22# I want this test to be actually run.
23BAIL_OUT 'aspell is not installed.' unless which 'aspell';
24
49046f7e 25my $build;
26eval { $build = Module::Build->current; 1; }
27 or BAIL_OUT 'We are not in a Module::Build session. Run Build.PL first.';
d45dd0ef 28
49046f7e 29my $locale = $build->notes('locale');
30my @stopwords;
d45dd0ef 31{
49046f7e 32 local $/ = undef;
33 my $yaml = Load(encode_utf8(<DATA>));
34 @stopwords = map {
35 # kill scalar's IV NV or else the smart-match later will bomb out
36 $_ eq 'Infinity' ? 'Infinity' : $_
37 } @{ $yaml->{$locale} };
d45dd0ef 38}
39
40my $iter = File::Next::files({
41 file_filter => sub {/\.html \z/msx},
42 sort_files => 1,
43 },
49046f7e 44 $build->destdir
d45dd0ef 45);
46
47my $file_counter;
48
49my $stylesheet = XML::LibXSLT->new->parse_stylesheet(
49046f7e 50 XML::LibXML->load_xml(string => <<""));
d45dd0ef 51<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
52 xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0">
49046f7e 53 <xsl:template match="xhtml:*[\@xml:lang!='$locale']"/>
e95a25e2 54 <xsl:template match="xhtml:abbr"/>
55 <xsl:template match="xhtml:acronym"/>
56 <xsl:template match="xhtml:code"/> <!-- filter computerese -->
49046f7e 57 <xsl:template match="\@* | node()"> <!-- apply identity function to rest of nodes -->
d45dd0ef 58 <xsl:copy>
49046f7e 59 <xsl:apply-templates select="\@* | node()"/>
d45dd0ef 60 </xsl:copy>
61 </xsl:template>
62</xsl:stylesheet>
63
64while (defined(my $html_file = $iter->())) {
65 $file_counter++;
66
67 my ($temp_handle, $temp_file) = tempfile;
68 my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,));
69 $stylesheet->output_fh($transformed, $temp_handle);
70
71 my ($stdout) = capture {
49046f7e 72 system "aspell -H --encoding=UTF-8 -l $locale list < $temp_file";
d45dd0ef 73 };
74 my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout;
75 ok !@misspelt_words, "$html_file ($temp_file) spell-check";
76 diag join "\n", sort @misspelt_words if @misspelt_words;
77}
78
79done_testing($file_counter);
80
81__DATA__
49046f7e 82---
83en:
d45dd0ef 84## personal names
49046f7e 85- Brocard
86- Bunce's
e95a25e2 87# Pierce Cawley
49046f7e 88- Cawley's
89- Champoux
90- chromatic
e95a25e2 91# Sam Crawley
49046f7e 92- Crawley
93- cuny's
94- Doran
95- franck
96- Grünauer
97- hakobe's
98- hanekomu
06ff6213 99- JT
49046f7e 100- Hengst's
e95a25e2 101# Kanat-Alexander
49046f7e 102- Kanat
06ff6213 103- Kogman
49046f7e 104- Kogman's
105- Kuri's
06ff6213 106- Lenz
49046f7e 107- Léon
06ff6213 108- Luehrs's
49046f7e 109- Napiorkowski
110- Pearcey's
111- Prather
112- Prather's
113- Ragwitz
114- Rockway
115- Rodighiero
116- Rolsky's
117- Stevan
118- sunnavy's
119- Treder's
120- trombik
121- Vecchi
122- Vilain
123- Vilain's
124- Yanick
125- Yuval
d45dd0ef 126
127## proper names
06ff6213 128- AirG
49046f7e 129- BizRate
06ff6213 130- CATH
131- CLAB
49046f7e 132- Cisco
133- Cloudtone
134- DoctorBase
135- Endeworks
136- GitHub
137- Gource
138- IMDb
e95a25e2 139# Kansai.pm
49046f7e 140- Kansai
141- LinuxMag
06ff6213 142- Matsch
49046f7e 143- MedTouch
144- MusicBrainz
145- OCaml
146- Omni
147- OnLAMP
148- PerlMonks
149- Pobox
150- Shadowcat
151- Shopzilla
152- SimplyClick
153- Simula
154- SocialText
155- Symantec
156- Takkle
157- Tamarou
158- TextMate
06ff6213 159- UNMC
160- UNO
49046f7e 161- ValueClick
06ff6213 162- WebGUI
d45dd0ef 163
164## Moose-specific
165
166## computerese
06ff6213 167- Parameterized
49046f7e 168- parameterized
d45dd0ef 169
170## other jargon
171
172## neologisms
06ff6213 173- Blog
49046f7e 174- blog
175- podcast
d45dd0ef 176
177## compound
e95a25e2 178# post-mortem
49046f7e 179- mortem
e95a25e2 180# PDX.pm
49046f7e 181- PDX
e95a25e2 182# London.pm's
49046f7e 183- pm's
d45dd0ef 184
185## slang
186
187## things that should be in the dictionary, but are not
49046f7e 188- Bioinformatics
189- Committers
190- refactoring
191- Refactoring
d45dd0ef 192
193## single foreign words
194
195## misspelt on purpose
49046f7e 196
197de:
198
199## Personennamen
200- Apocalypse
06ff6213 201- Austin
49046f7e 202- Austins
203- Barry
204- Boones
205- Brocard
206- Bruno
207- Bunce
208- Cawleys
209- Champoux
210- Chris
211- chromatic
212- Cory
213- Crawley
214- cunys
215- Dave
216- Devin
217- Doran
218- Doug
219- Drew
220- franck
221- Grünauer
222- hakobes
223- hanekomu
224- Hengsts
225- Jay
06ff6213 226- Jesse
49046f7e 227- Jonathan
06ff6213 228- JT
49046f7e 229- Kanat
06ff6213 230- Kogman
49046f7e 231- Kogmans
232- Kuri
233- Larry
234- Léon
06ff6213 235- Luehrs
49046f7e 236- Little
237- Littles
238- Marcel
06ff6213 239- Moritz
49046f7e 240- McLaughlin
241- Mike
242- Napiorkowski
243- Ovid
244- Ovids
245- Pearceys
246- Piers
247- Prather
248- Prathers
249- Ragwitz
250- Randal
251- Rockway
252- Rodighiero
253- Rolsky
254- Rolskys
255- Schwartz
256- Shawn
06ff6213 257- Smith
49046f7e 258- Stefano
259- Stephens
260- Stevan
06ff6213 261- Stuart
49046f7e 262- sunnavys
263- Tomas
264- Treders
265- trombik
266- Trout
267- Vecchi
268- Vilain
269- Vilains
270- Walsh
271- Watsons
272- Whitakers
273- Yanick
274- Yuval
275
276## Eigennamen
49046f7e 277- Beijing
49046f7e 278- Catalyst
49046f7e 279- Doodle
06ff6213 280- Expanse
49046f7e 281- Frozen
282- Git
283- GitHub
284- Gource
49046f7e 285- Houston
49046f7e 286- Infinity
287- Interactive
288- Kansai
06ff6213 289- Lacuna
49046f7e 290- LinuxMag
49046f7e 291- Melbourne
49046f7e 292- Nashville
293- Oasis
294- OCaml
49046f7e 295- OnLAMP
49046f7e 296- Overflow
297- PerlMonks
298- Pittsburgh
49046f7e 299- Ruby
49046f7e 300- Simula
49046f7e 301- TextMate
06ff6213 302- WebGUI
49046f7e 303
304## Moose-spezifisch
305- Metaobjektprotokoll
306- MOP
307- Mouse
308- parameterisierte
309- Objektmetaprogrammierung
310
311## Computerjargon
312- Beispielcode
313- Codewiederverwendung
314
315## anderer Jargon
316- Perlmonger
d48226eb 317- Podcast
49046f7e 318
319## Neologismen
320
321## Verbundworte
322# Duck-Typing
323- Duck
324- Typing
325# Lese-Evaluierungs-Ausgabe-Schleife
326- Evaluierungs
327# 100%ig
328- ig
329# Perl.it
330- it
331# E-Lamp
332- Lamp
333# Meta-Moose
334- Meta
335# PDX.pm
336- PDX
337# Plug-In
338- Plug
339# diverse .pm
340- pm
341# RC-Dateien
342- RC
343
344## Umgangssprache
345
346## nicht im Wörterbuch, aber sollte drin stehen
06ff6213 347- Antiquierung
49046f7e 348- Blog
349- Blogeintrag
350- Blogeinträge
06ff6213 351- Datenkapselung
49046f7e 352- Endbericht
195ac982 353- Expertenschulung
49046f7e 354- Gemeinschaftsprojekt
355- Gruppentreffen
356- Hauptdistribution
357- Hauptseite
358- Hilfeangeboten
359- nachzuschlagen
360- Objektsystem
361- Objektsystems
362- Produktionseinsatz
363- Proteinanalyse
364- rollenbasierten
365- Rollenzusammensetzung
366- Schnellreferenzkarte
367- Skriptoptionen
368- Webansicht
369
370## einzelne Fremdwörter
371- Refactoring
372- Repository
373- Repositorys
374
375## absichtlich falsch