some new articles added
[gitmo/moose-website.git] / t / 02-spelling.t
CommitLineData
d45dd0ef 1#!perl
2use 5.010;
3use utf8;
4use strict;
5use warnings FATAL => 'all';
6use autodie qw(:all);
7use Capture::Tiny qw(capture);
49046f7e 8use Encode qw(encode_utf8 decode_utf8);
d45dd0ef 9use File::Next qw();
10use File::Temp qw(tempfile);
11use File::Which qw(which);
49046f7e 12use Module::Build qw();
d45dd0ef 13use Test::More;
14use XML::LibXML qw();
15use XML::LibXSLT qw();
49046f7e 16use YAML::XS qw(Load);
d45dd0ef 17
18binmode Test::More->builder->$_, ':encoding(UTF-8)'
19 for qw(output failure_output todo_output);
20
21# Skip means sweep bugs under the rug.
22# I want this test to be actually run.
23BAIL_OUT 'aspell is not installed.' unless which 'aspell';
24
49046f7e 25my $build;
26eval { $build = Module::Build->current; 1; }
27 or BAIL_OUT 'We are not in a Module::Build session. Run Build.PL first.';
d45dd0ef 28
49046f7e 29my $locale = $build->notes('locale');
30my @stopwords;
d45dd0ef 31{
49046f7e 32 local $/ = undef;
33 my $yaml = Load(encode_utf8(<DATA>));
34 @stopwords = map {
35 # kill scalar's IV NV or else the smart-match later will bomb out
36 $_ eq 'Infinity' ? 'Infinity' : $_
37 } @{ $yaml->{$locale} };
d45dd0ef 38}
39
40my $iter = File::Next::files({
41 file_filter => sub {/\.html \z/msx},
42 sort_files => 1,
43 },
49046f7e 44 $build->destdir
d45dd0ef 45);
46
47my $file_counter;
48
49my $stylesheet = XML::LibXSLT->new->parse_stylesheet(
49046f7e 50 XML::LibXML->load_xml(string => <<""));
d45dd0ef 51<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
52 xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0">
49046f7e 53 <xsl:template match="xhtml:*[\@xml:lang!='$locale']"/>
e95a25e2 54 <xsl:template match="xhtml:abbr"/>
55 <xsl:template match="xhtml:acronym"/>
56 <xsl:template match="xhtml:code"/> <!-- filter computerese -->
49046f7e 57 <xsl:template match="\@* | node()"> <!-- apply identity function to rest of nodes -->
d45dd0ef 58 <xsl:copy>
49046f7e 59 <xsl:apply-templates select="\@* | node()"/>
d45dd0ef 60 </xsl:copy>
61 </xsl:template>
62</xsl:stylesheet>
63
64while (defined(my $html_file = $iter->())) {
65 $file_counter++;
66
67 my ($temp_handle, $temp_file) = tempfile;
68 my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,));
69 $stylesheet->output_fh($transformed, $temp_handle);
70
71 my ($stdout) = capture {
49046f7e 72 system "aspell -H --encoding=UTF-8 -l $locale list < $temp_file";
d45dd0ef 73 };
74 my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout;
75 ok !@misspelt_words, "$html_file ($temp_file) spell-check";
76 diag join "\n", sort @misspelt_words if @misspelt_words;
77}
78
79done_testing($file_counter);
80
81__DATA__
49046f7e 82---
83en:
d45dd0ef 84## personal names
49046f7e 85- Brocard
86- Bunce's
e95a25e2 87# Pierce Cawley
49046f7e 88- Cawley's
89- Champoux
90- chromatic
e95a25e2 91# Sam Crawley
49046f7e 92- Crawley
93- cuny's
94- Doran
95- franck
96- Grünauer
97- hakobe's
98- hanekomu
06ff6213 99- JT
49046f7e 100- Hengst's
e95a25e2 101# Kanat-Alexander
49046f7e 102- Kanat
06ff6213 103- Kogman
49046f7e 104- Kogman's
105- Kuri's
06ff6213 106- Lenz
49046f7e 107- Léon
06ff6213 108- Luehrs's
49046f7e 109- Napiorkowski
110- Pearcey's
72a74816 111- Perek's
49046f7e 112- Prather
113- Prather's
114- Ragwitz
115- Rockway
116- Rodighiero
117- Rolsky's
72a74816 118- SawyerX's
119- Shlomi
49046f7e 120- Stevan
121- sunnavy's
122- Treder's
123- trombik
124- Vecchi
125- Vilain
126- Vilain's
127- Yanick
72a74816 128- Ynon
49046f7e 129- Yuval
d45dd0ef 130
131## proper names
06ff6213 132- AirG
49046f7e 133- BizRate
06ff6213 134- CATH
135- CLAB
49046f7e 136- Cisco
137- Cloudtone
138- DoctorBase
139- Endeworks
72a74816 140- FOSDEM
49046f7e 141- GitHub
142- Gource
143- IMDb
e95a25e2 144# Kansai.pm
49046f7e 145- Kansai
72a74816 146- Linkfluence
49046f7e 147- LinuxMag
06ff6213 148- Matsch
49046f7e 149- MedTouch
150- MusicBrainz
151- OCaml
152- Omni
153- OnLAMP
154- PerlMonks
155- Pobox
156- Shadowcat
157- Shopzilla
158- SimplyClick
159- Simula
160- SocialText
161- Symantec
162- Takkle
163- Tamarou
164- TextMate
06ff6213 165- UNMC
166- UNO
49046f7e 167- ValueClick
06ff6213 168- WebGUI
d45dd0ef 169
170## Moose-specific
171
172## computerese
06ff6213 173- Parameterized
49046f7e 174- parameterized
d45dd0ef 175
176## other jargon
177
178## neologisms
06ff6213 179- Blog
49046f7e 180- blog
181- podcast
d45dd0ef 182
183## compound
e95a25e2 184# post-mortem
49046f7e 185- mortem
e95a25e2 186# PDX.pm
49046f7e 187- PDX
e95a25e2 188# London.pm's
49046f7e 189- pm's
d45dd0ef 190
191## slang
192
193## things that should be in the dictionary, but are not
49046f7e 194- Bioinformatics
195- Committers
196- refactoring
197- Refactoring
d45dd0ef 198
199## single foreign words
200
201## misspelt on purpose
49046f7e 202
203de:
204
205## Personennamen
206- Apocalypse
06ff6213 207- Austin
49046f7e 208- Austins
209- Barry
210- Boones
211- Brocard
212- Bruno
213- Bunce
214- Cawleys
215- Champoux
216- Chris
217- chromatic
218- Cory
219- Crawley
220- cunys
221- Dave
222- Devin
223- Doran
224- Doug
225- Drew
72a74816 226- Fish
49046f7e 227- franck
228- Grünauer
229- hakobes
230- hanekomu
231- Hengsts
232- Jay
06ff6213 233- Jesse
49046f7e 234- Jonathan
06ff6213 235- JT
49046f7e 236- Kanat
06ff6213 237- Kogman
49046f7e 238- Kogmans
239- Kuri
240- Larry
241- Léon
06ff6213 242- Luehrs
49046f7e 243- Little
244- Littles
245- Marcel
06ff6213 246- Moritz
49046f7e 247- McLaughlin
248- Mike
249- Napiorkowski
250- Ovid
251- Ovids
252- Pearceys
72a74816 253- Pereks
49046f7e 254- Piers
255- Prather
256- Prathers
257- Ragwitz
258- Randal
259- Rockway
260- Rodighiero
261- Rolsky
262- Rolskys
72a74816 263- SawyerX
49046f7e 264- Schwartz
265- Shawn
72a74816 266- Shlomi
06ff6213 267- Smith
49046f7e 268- Stefano
269- Stephens
270- Stevan
06ff6213 271- Stuart
49046f7e 272- sunnavys
273- Tomas
274- Treders
275- trombik
276- Trout
277- Vecchi
278- Vilain
279- Vilains
280- Walsh
281- Watsons
282- Whitakers
283- Yanick
72a74816 284- Ynon
49046f7e 285- Yuval
286
287## Eigennamen
49046f7e 288- Beijing
49046f7e 289- Catalyst
49046f7e 290- Doodle
06ff6213 291- Expanse
72a74816 292- FOSDEM
49046f7e 293- Frozen
294- Git
295- GitHub
296- Gource
49046f7e 297- Houston
49046f7e 298- Infinity
299- Interactive
300- Kansai
06ff6213 301- Lacuna
49046f7e 302- LinuxMag
49046f7e 303- Melbourne
49046f7e 304- Nashville
305- Oasis
306- OCaml
49046f7e 307- OnLAMP
49046f7e 308- Overflow
309- PerlMonks
310- Pittsburgh
49046f7e 311- Ruby
49046f7e 312- Simula
49046f7e 313- TextMate
06ff6213 314- WebGUI
49046f7e 315
316## Moose-spezifisch
317- Metaobjektprotokoll
318- MOP
319- Mouse
320- parameterisierte
321- Objektmetaprogrammierung
322
323## Computerjargon
324- Beispielcode
325- Codewiederverwendung
326
327## anderer Jargon
328- Perlmonger
d48226eb 329- Podcast
49046f7e 330
331## Neologismen
332
333## Verbundworte
334# Duck-Typing
335- Duck
336- Typing
337# Lese-Evaluierungs-Ausgabe-Schleife
338- Evaluierungs
339# 100%ig
340- ig
341# Perl.it
342- it
343# E-Lamp
344- Lamp
345# Meta-Moose
346- Meta
347# PDX.pm
348- PDX
349# Plug-In
350- Plug
351# diverse .pm
352- pm
353# RC-Dateien
354- RC
355
356## Umgangssprache
357
358## nicht im Wörterbuch, aber sollte drin stehen
06ff6213 359- Antiquierung
49046f7e 360- Blog
361- Blogeintrag
362- Blogeinträge
06ff6213 363- Datenkapselung
49046f7e 364- Endbericht
195ac982 365- Expertenschulung
49046f7e 366- Gemeinschaftsprojekt
367- Gruppentreffen
368- Hauptdistribution
369- Hauptseite
370- Hilfeangeboten
371- nachzuschlagen
372- Objektsystem
373- Objektsystems
374- Produktionseinsatz
375- Proteinanalyse
376- rollenbasierten
377- Rollenzusammensetzung
378- Schnellreferenzkarte
379- Skriptoptionen
72a74816 380- Vortragsfolien
49046f7e 381- Webansicht
382
383## einzelne Fremdwörter
384- Refactoring
385- Repository
386- Repositorys
387
388## absichtlich falsch