Commit | Line | Data |
d45dd0ef |
1 | #!perl |
2 | use 5.010; |
3 | use utf8; |
4 | use strict; |
5 | use warnings FATAL => 'all'; |
6 | use autodie qw(:all); |
7 | use Capture::Tiny qw(capture); |
49046f7e |
8 | use Encode qw(encode_utf8 decode_utf8); |
d45dd0ef |
9 | use File::Next qw(); |
10 | use File::Temp qw(tempfile); |
11 | use File::Which qw(which); |
49046f7e |
12 | use Module::Build qw(); |
d45dd0ef |
13 | use Test::More; |
14 | use XML::LibXML qw(); |
15 | use XML::LibXSLT qw(); |
49046f7e |
16 | use YAML::XS qw(Load); |
d45dd0ef |
17 | |
18 | binmode Test::More->builder->$_, ':encoding(UTF-8)' |
19 | for qw(output failure_output todo_output); |
20 | |
21 | # Skip means sweep bugs under the rug. |
22 | # I want this test to be actually run. |
23 | BAIL_OUT 'aspell is not installed.' unless which 'aspell'; |
24 | |
49046f7e |
25 | my $build; |
26 | eval { $build = Module::Build->current; 1; } |
27 | or BAIL_OUT 'We are not in a Module::Build session. Run Build.PL first.'; |
d45dd0ef |
28 | |
49046f7e |
29 | my $locale = $build->notes('locale'); |
30 | my @stopwords; |
d45dd0ef |
31 | { |
49046f7e |
32 | local $/ = undef; |
33 | my $yaml = Load(encode_utf8(<DATA>)); |
34 | @stopwords = map { |
35 | # kill scalar's IV NV or else the smart-match later will bomb out |
36 | $_ eq 'Infinity' ? 'Infinity' : $_ |
37 | } @{ $yaml->{$locale} }; |
d45dd0ef |
38 | } |
39 | |
40 | my $iter = File::Next::files({ |
41 | file_filter => sub {/\.html \z/msx}, |
42 | sort_files => 1, |
43 | }, |
49046f7e |
44 | $build->destdir |
d45dd0ef |
45 | ); |
46 | |
47 | my $file_counter; |
48 | |
49 | my $stylesheet = XML::LibXSLT->new->parse_stylesheet( |
49046f7e |
50 | XML::LibXML->load_xml(string => <<"")); |
d45dd0ef |
51 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
52 | xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0"> |
49046f7e |
53 | <xsl:template match="xhtml:*[\@xml:lang!='$locale']"/> |
e95a25e2 |
54 | <xsl:template match="xhtml:abbr"/> |
55 | <xsl:template match="xhtml:acronym"/> |
56 | <xsl:template match="xhtml:code"/> <!-- filter computerese --> |
49046f7e |
57 | <xsl:template match="\@* | node()"> <!-- apply identity function to rest of nodes --> |
d45dd0ef |
58 | <xsl:copy> |
49046f7e |
59 | <xsl:apply-templates select="\@* | node()"/> |
d45dd0ef |
60 | </xsl:copy> |
61 | </xsl:template> |
62 | </xsl:stylesheet> |
63 | |
64 | while (defined(my $html_file = $iter->())) { |
65 | $file_counter++; |
66 | |
67 | my ($temp_handle, $temp_file) = tempfile; |
68 | my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,)); |
69 | $stylesheet->output_fh($transformed, $temp_handle); |
70 | |
71 | my ($stdout) = capture { |
49046f7e |
72 | system "aspell -H --encoding=UTF-8 -l $locale list < $temp_file"; |
d45dd0ef |
73 | }; |
74 | my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout; |
75 | ok !@misspelt_words, "$html_file ($temp_file) spell-check"; |
76 | diag join "\n", sort @misspelt_words if @misspelt_words; |
77 | } |
78 | |
79 | done_testing($file_counter); |
80 | |
81 | __DATA__ |
49046f7e |
82 | --- |
83 | en: |
d45dd0ef |
84 | ## personal names |
49046f7e |
85 | - Brocard |
86 | - Bunce's |
e95a25e2 |
87 | # Pierce Cawley |
49046f7e |
88 | - Cawley's |
89 | - Champoux |
90 | - chromatic |
e95a25e2 |
91 | # Sam Crawley |
49046f7e |
92 | - Crawley |
93 | - cuny's |
94 | - Doran |
95 | - franck |
96 | - Grünauer |
97 | - hakobe's |
98 | - hanekomu |
06ff6213 |
99 | - JT |
49046f7e |
100 | - Hengst's |
e95a25e2 |
101 | # Kanat-Alexander |
49046f7e |
102 | - Kanat |
06ff6213 |
103 | - Kogman |
49046f7e |
104 | - Kogman's |
105 | - Kuri's |
06ff6213 |
106 | - Lenz |
49046f7e |
107 | - Léon |
06ff6213 |
108 | - Luehrs's |
49046f7e |
109 | - Napiorkowski |
110 | - Pearcey's |
72a74816 |
111 | - Perek's |
49046f7e |
112 | - Prather |
113 | - Prather's |
114 | - Ragwitz |
115 | - Rockway |
116 | - Rodighiero |
117 | - Rolsky's |
72a74816 |
118 | - SawyerX's |
119 | - Shlomi |
49046f7e |
120 | - Stevan |
121 | - sunnavy's |
122 | - Treder's |
123 | - trombik |
124 | - Vecchi |
125 | - Vilain |
126 | - Vilain's |
127 | - Yanick |
72a74816 |
128 | - Ynon |
49046f7e |
129 | - Yuval |
d45dd0ef |
130 | |
131 | ## proper names |
06ff6213 |
132 | - AirG |
49046f7e |
133 | - BizRate |
06ff6213 |
134 | - CATH |
135 | - CLAB |
49046f7e |
136 | - Cisco |
137 | - Cloudtone |
138 | - DoctorBase |
139 | - Endeworks |
72a74816 |
140 | - FOSDEM |
49046f7e |
141 | - GitHub |
142 | - Gource |
143 | - IMDb |
e95a25e2 |
144 | # Kansai.pm |
49046f7e |
145 | - Kansai |
72a74816 |
146 | - Linkfluence |
49046f7e |
147 | - LinuxMag |
06ff6213 |
148 | - Matsch |
49046f7e |
149 | - MedTouch |
150 | - MusicBrainz |
151 | - OCaml |
152 | - Omni |
153 | - OnLAMP |
154 | - PerlMonks |
155 | - Pobox |
156 | - Shadowcat |
157 | - Shopzilla |
158 | - SimplyClick |
159 | - Simula |
160 | - SocialText |
161 | - Symantec |
162 | - Takkle |
163 | - Tamarou |
164 | - TextMate |
06ff6213 |
165 | - UNMC |
166 | - UNO |
49046f7e |
167 | - ValueClick |
06ff6213 |
168 | - WebGUI |
d45dd0ef |
169 | |
170 | ## Moose-specific |
171 | |
172 | ## computerese |
06ff6213 |
173 | - Parameterized |
49046f7e |
174 | - parameterized |
d45dd0ef |
175 | |
176 | ## other jargon |
177 | |
178 | ## neologisms |
06ff6213 |
179 | - Blog |
49046f7e |
180 | - blog |
181 | - podcast |
d45dd0ef |
182 | |
183 | ## compound |
e95a25e2 |
184 | # post-mortem |
49046f7e |
185 | - mortem |
e95a25e2 |
186 | # PDX.pm |
49046f7e |
187 | - PDX |
e95a25e2 |
188 | # London.pm's |
49046f7e |
189 | - pm's |
d45dd0ef |
190 | |
191 | ## slang |
192 | |
193 | ## things that should be in the dictionary, but are not |
49046f7e |
194 | - Bioinformatics |
195 | - Committers |
196 | - refactoring |
197 | - Refactoring |
d45dd0ef |
198 | |
199 | ## single foreign words |
200 | |
201 | ## misspelt on purpose |
49046f7e |
202 | |
203 | de: |
204 | |
205 | ## Personennamen |
206 | - Apocalypse |
06ff6213 |
207 | - Austin |
49046f7e |
208 | - Austins |
209 | - Barry |
210 | - Boones |
211 | - Brocard |
212 | - Bruno |
213 | - Bunce |
214 | - Cawleys |
215 | - Champoux |
216 | - Chris |
217 | - chromatic |
218 | - Cory |
219 | - Crawley |
220 | - cunys |
221 | - Dave |
222 | - Devin |
223 | - Doran |
224 | - Doug |
225 | - Drew |
72a74816 |
226 | - Fish |
49046f7e |
227 | - franck |
228 | - Grünauer |
229 | - hakobes |
230 | - hanekomu |
231 | - Hengsts |
232 | - Jay |
06ff6213 |
233 | - Jesse |
49046f7e |
234 | - Jonathan |
06ff6213 |
235 | - JT |
49046f7e |
236 | - Kanat |
06ff6213 |
237 | - Kogman |
49046f7e |
238 | - Kogmans |
239 | - Kuri |
240 | - Larry |
241 | - Léon |
06ff6213 |
242 | - Luehrs |
49046f7e |
243 | - Little |
244 | - Littles |
245 | - Marcel |
06ff6213 |
246 | - Moritz |
49046f7e |
247 | - McLaughlin |
248 | - Mike |
249 | - Napiorkowski |
250 | - Ovid |
251 | - Ovids |
252 | - Pearceys |
72a74816 |
253 | - Pereks |
49046f7e |
254 | - Piers |
255 | - Prather |
256 | - Prathers |
257 | - Ragwitz |
258 | - Randal |
259 | - Rockway |
260 | - Rodighiero |
261 | - Rolsky |
262 | - Rolskys |
72a74816 |
263 | - SawyerX |
49046f7e |
264 | - Schwartz |
265 | - Shawn |
72a74816 |
266 | - Shlomi |
06ff6213 |
267 | - Smith |
49046f7e |
268 | - Stefano |
269 | - Stephens |
270 | - Stevan |
06ff6213 |
271 | - Stuart |
49046f7e |
272 | - sunnavys |
273 | - Tomas |
274 | - Treders |
275 | - trombik |
276 | - Trout |
277 | - Vecchi |
278 | - Vilain |
279 | - Vilains |
280 | - Walsh |
281 | - Watsons |
282 | - Whitakers |
283 | - Yanick |
72a74816 |
284 | - Ynon |
49046f7e |
285 | - Yuval |
286 | |
287 | ## Eigennamen |
49046f7e |
288 | - Beijing |
49046f7e |
289 | - Catalyst |
49046f7e |
290 | - Doodle |
06ff6213 |
291 | - Expanse |
72a74816 |
292 | - FOSDEM |
49046f7e |
293 | - Frozen |
294 | - Git |
295 | - GitHub |
296 | - Gource |
49046f7e |
297 | - Houston |
49046f7e |
298 | - Infinity |
299 | - Interactive |
300 | - Kansai |
06ff6213 |
301 | - Lacuna |
49046f7e |
302 | - LinuxMag |
49046f7e |
303 | - Melbourne |
49046f7e |
304 | - Nashville |
305 | - Oasis |
306 | - OCaml |
49046f7e |
307 | - OnLAMP |
49046f7e |
308 | - Overflow |
309 | - PerlMonks |
310 | - Pittsburgh |
49046f7e |
311 | - Ruby |
49046f7e |
312 | - Simula |
49046f7e |
313 | - TextMate |
06ff6213 |
314 | - WebGUI |
49046f7e |
315 | |
316 | ## Moose-spezifisch |
317 | - Metaobjektprotokoll |
318 | - MOP |
319 | - Mouse |
320 | - parameterisierte |
321 | - Objektmetaprogrammierung |
322 | |
323 | ## Computerjargon |
324 | - Beispielcode |
325 | - Codewiederverwendung |
326 | |
327 | ## anderer Jargon |
328 | - Perlmonger |
d48226eb |
329 | - Podcast |
49046f7e |
330 | |
331 | ## Neologismen |
332 | |
333 | ## Verbundworte |
334 | # Duck-Typing |
335 | - Duck |
336 | - Typing |
337 | # Lese-Evaluierungs-Ausgabe-Schleife |
338 | - Evaluierungs |
339 | # 100%ig |
340 | - ig |
341 | # Perl.it |
342 | - it |
343 | # E-Lamp |
344 | - Lamp |
345 | # Meta-Moose |
346 | - Meta |
347 | # PDX.pm |
348 | - PDX |
349 | # Plug-In |
350 | - Plug |
351 | # diverse .pm |
352 | - pm |
353 | # RC-Dateien |
354 | - RC |
355 | |
356 | ## Umgangssprache |
357 | |
358 | ## nicht im Wörterbuch, aber sollte drin stehen |
06ff6213 |
359 | - Antiquierung |
49046f7e |
360 | - Blog |
361 | - Blogeintrag |
362 | - Blogeinträge |
06ff6213 |
363 | - Datenkapselung |
49046f7e |
364 | - Endbericht |
195ac982 |
365 | - Expertenschulung |
49046f7e |
366 | - Gemeinschaftsprojekt |
367 | - Gruppentreffen |
368 | - Hauptdistribution |
369 | - Hauptseite |
370 | - Hilfeangeboten |
371 | - nachzuschlagen |
372 | - Objektsystem |
373 | - Objektsystems |
374 | - Produktionseinsatz |
375 | - Proteinanalyse |
376 | - rollenbasierten |
377 | - Rollenzusammensetzung |
378 | - Schnellreferenzkarte |
379 | - Skriptoptionen |
72a74816 |
380 | - Vortragsfolien |
49046f7e |
381 | - Webansicht |
382 | |
383 | ## einzelne Fremdwörter |
384 | - Refactoring |
385 | - Repository |
386 | - Repositorys |
387 | |
388 | ## absichtlich falsch |