fix HTML
[gitmo/moose-website.git] / t / 02-spelling.t
CommitLineData
d45dd0ef 1#!perl
2use 5.010;
3use utf8;
4use strict;
5use warnings FATAL => 'all';
6use autodie qw(:all);
7use Capture::Tiny qw(capture);
49046f7e 8use Encode qw(encode_utf8 decode_utf8);
d45dd0ef 9use File::Next qw();
10use File::Temp qw(tempfile);
11use File::Which qw(which);
49046f7e 12use Module::Build qw();
d45dd0ef 13use Test::More;
14use XML::LibXML qw();
15use XML::LibXSLT qw();
49046f7e 16use YAML::XS qw(Load);
d45dd0ef 17
18binmode Test::More->builder->$_, ':encoding(UTF-8)'
19 for qw(output failure_output todo_output);
20
21# Skip means sweep bugs under the rug.
22# I want this test to be actually run.
23BAIL_OUT 'aspell is not installed.' unless which 'aspell';
24
49046f7e 25my $build;
26eval { $build = Module::Build->current; 1; }
27 or BAIL_OUT 'We are not in a Module::Build session. Run Build.PL first.';
d45dd0ef 28
49046f7e 29my $locale = $build->notes('locale');
30my @stopwords;
d45dd0ef 31{
49046f7e 32 local $/ = undef;
33 my $yaml = Load(encode_utf8(<DATA>));
34 @stopwords = map {
35 # kill scalar's IV NV or else the smart-match later will bomb out
36 $_ eq 'Infinity' ? 'Infinity' : $_
37 } @{ $yaml->{$locale} };
d45dd0ef 38}
39
40my $iter = File::Next::files({
41 file_filter => sub {/\.html \z/msx},
42 sort_files => 1,
43 },
49046f7e 44 $build->destdir
d45dd0ef 45);
46
47my $file_counter;
48
49my $stylesheet = XML::LibXSLT->new->parse_stylesheet(
49046f7e 50 XML::LibXML->load_xml(string => <<""));
d45dd0ef 51<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
52 xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0">
49046f7e 53 <xsl:template match="xhtml:*[\@xml:lang!='$locale']"/>
e95a25e2 54 <xsl:template match="xhtml:abbr"/>
55 <xsl:template match="xhtml:acronym"/>
56 <xsl:template match="xhtml:code"/> <!-- filter computerese -->
49046f7e 57 <xsl:template match="\@* | node()"> <!-- apply identity function to rest of nodes -->
d45dd0ef 58 <xsl:copy>
49046f7e 59 <xsl:apply-templates select="\@* | node()"/>
d45dd0ef 60 </xsl:copy>
61 </xsl:template>
62</xsl:stylesheet>
63
64while (defined(my $html_file = $iter->())) {
65 $file_counter++;
66
67 my ($temp_handle, $temp_file) = tempfile;
68 my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,));
69 $stylesheet->output_fh($transformed, $temp_handle);
70
71 my ($stdout) = capture {
49046f7e 72 system "aspell -H --encoding=UTF-8 -l $locale list < $temp_file";
d45dd0ef 73 };
74 my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout;
75 ok !@misspelt_words, "$html_file ($temp_file) spell-check";
76 diag join "\n", sort @misspelt_words if @misspelt_words;
77}
78
79done_testing($file_counter);
80
81__DATA__
49046f7e 82---
83en:
d45dd0ef 84## personal names
49046f7e 85- Brocard
86- Bunce's
e95a25e2 87# Pierce Cawley
49046f7e 88- Cawley's
89- Champoux
90- chromatic
e95a25e2 91# Sam Crawley
49046f7e 92- Crawley
93- cuny's
94- Doran
95- franck
96- Grünauer
97- hakobe's
98- hanekomu
99- Hengst's
e95a25e2 100# Kanat-Alexander
49046f7e 101- Kanat
102- Kogman's
103- Kuri's
104- Léon
105- Napiorkowski
106- Pearcey's
107- Prather
108- Prather's
109- Ragwitz
110- Rockway
111- Rodighiero
112- Rolsky's
113- Stevan
114- sunnavy's
115- Treder's
116- trombik
117- Vecchi
118- Vilain
119- Vilain's
120- Yanick
121- Yuval
d45dd0ef 122
123## proper names
49046f7e 124- BizRate
125- Cisco
126- Cloudtone
127- DoctorBase
128- Endeworks
129- GitHub
130- Gource
131- IMDb
e95a25e2 132# Kansai.pm
49046f7e 133- Kansai
134- LinuxMag
135- MedTouch
136- MusicBrainz
137- OCaml
138- Omni
139- OnLAMP
140- PerlMonks
141- Pobox
142- Shadowcat
143- Shopzilla
144- SimplyClick
145- Simula
146- SocialText
147- Symantec
148- Takkle
149- Tamarou
150- TextMate
151- ValueClick
d45dd0ef 152
153## Moose-specific
154
155## computerese
49046f7e 156- parameterized
d45dd0ef 157
158## other jargon
159
160## neologisms
49046f7e 161- blog
162- podcast
d45dd0ef 163
164## compound
e95a25e2 165# post-mortem
49046f7e 166- mortem
e95a25e2 167# PDX.pm
49046f7e 168- PDX
e95a25e2 169# London.pm's
49046f7e 170- pm's
d45dd0ef 171
172## slang
173
174## things that should be in the dictionary, but are not
49046f7e 175- Bioinformatics
176- Committers
177- refactoring
178- Refactoring
d45dd0ef 179
180## single foreign words
181
182## misspelt on purpose
49046f7e 183
184de:
185
186## Personennamen
187- Apocalypse
188- Austins
189- Barry
190- Boones
191- Brocard
192- Bruno
193- Bunce
194- Cawleys
195- Champoux
196- Chris
197- chromatic
198- Cory
199- Crawley
200- cunys
201- Dave
202- Devin
203- Doran
204- Doug
205- Drew
206- franck
207- Grünauer
208- hakobes
209- hanekomu
210- Hengsts
211- Jay
212- Jonathan
213- Kanat
214- Kogmans
215- Kuri
216- Larry
217- Léon
218- Little
219- Littles
220- Marcel
221- McLaughlin
222- Mike
223- Napiorkowski
224- Ovid
225- Ovids
226- Pearceys
227- Piers
228- Prather
229- Prathers
230- Ragwitz
231- Randal
232- Rockway
233- Rodighiero
234- Rolsky
235- Rolskys
236- Schwartz
237- Shawn
238- Stefano
239- Stephens
240- Stevan
241- sunnavys
242- Tomas
243- Treders
244- trombik
245- Trout
246- Vecchi
247- Vilain
248- Vilains
249- Walsh
250- Watsons
251- Whitakers
252- Yanick
253- Yuval
254
255## Eigennamen
256- at
257- Advantage
258- Beijing
259- Best
260- Bioinformatics
261- BizRate
262- Capitol
263- Catalyst
264- Cisco
265- Cloudtone
266- Corporation
267- Current
268- DoctorBase
269- Doodle
270- Endeworks
271- Frozen
272- Git
273- GitHub
274- Gource
275- Group
276- Hearst
277- Houston
278- IMDb
279- Infinity
280- Interactive
281- Kansai
282- Lexy
283- LinuxMag
284- Magazines
285- MedTouch
286- Melbourne
287- MusicBrainz
288- Napster
289- Nashville
290- Oasis
291- OCaml
292- Omni
293- OnLAMP
294- Open
295- Overflow
296- PerlMonks
297- Pittsburgh
298- Pobox
299- Practical
300- Ruby
301- Shadowcat
302- Shopzilla
303- SimplyClick
304- Simula
305- SocialText
306- Studios
307- Symantec
308- Takkle
309- Tamarou
310- TextMate
311- The
312- University
313- ValueClick
314- Yahoo
315# net-a-porter.com
316- com
317- net
318- porter
319
320## Moose-spezifisch
321- Metaobjektprotokoll
322- MOP
323- Mouse
324- parameterisierte
325- Objektmetaprogrammierung
326
327## Computerjargon
328- Beispielcode
329- Codewiederverwendung
330
331## anderer Jargon
332- Perlmonger
d48226eb 333- Podcast
49046f7e 334
335## Neologismen
336
337## Verbundworte
338# Duck-Typing
339- Duck
340- Typing
341# Lese-Evaluierungs-Ausgabe-Schleife
342- Evaluierungs
343# 100%ig
344- ig
345# Perl.it
346- it
347# E-Lamp
348- Lamp
349# Meta-Moose
350- Meta
351# PDX.pm
352- PDX
353# Plug-In
354- Plug
355# diverse .pm
356- pm
357# RC-Dateien
358- RC
359
360## Umgangssprache
361
362## nicht im Wörterbuch, aber sollte drin stehen
363- Blog
364- Blogeintrag
365- Blogeinträge
366- Endbericht
195ac982 367- Expertenschulung
49046f7e 368- Gemeinschaftsprojekt
369- Gruppentreffen
370- Hauptdistribution
371- Hauptseite
372- Hilfeangeboten
373- nachzuschlagen
374- Objektsystem
375- Objektsystems
376- Produktionseinsatz
377- Proteinanalyse
378- rollenbasierten
379- Rollenzusammensetzung
380- Schnellreferenzkarte
381- Skriptoptionen
382- Webansicht
383
384## einzelne Fremdwörter
385- Refactoring
386- Repository
387- Repositorys
388
389## absichtlich falsch