Commit | Line | Data |
d45dd0ef |
1 | #!perl |
2 | use 5.010; |
3 | use utf8; |
4 | use strict; |
5 | use warnings FATAL => 'all'; |
6 | use autodie qw(:all); |
7 | use Capture::Tiny qw(capture); |
49046f7e |
8 | use Encode qw(encode_utf8 decode_utf8); |
d45dd0ef |
9 | use File::Next qw(); |
10 | use File::Temp qw(tempfile); |
11 | use File::Which qw(which); |
49046f7e |
12 | use Module::Build qw(); |
d45dd0ef |
13 | use Test::More; |
14 | use XML::LibXML qw(); |
15 | use XML::LibXSLT qw(); |
49046f7e |
16 | use YAML::XS qw(Load); |
d45dd0ef |
17 | |
18 | binmode Test::More->builder->$_, ':encoding(UTF-8)' |
19 | for qw(output failure_output todo_output); |
20 | |
21 | # Skip means sweep bugs under the rug. |
22 | # I want this test to be actually run. |
23 | BAIL_OUT 'aspell is not installed.' unless which 'aspell'; |
24 | |
49046f7e |
25 | my $build; |
26 | eval { $build = Module::Build->current; 1; } |
27 | or BAIL_OUT 'We are not in a Module::Build session. Run Build.PL first.'; |
d45dd0ef |
28 | |
49046f7e |
29 | my $locale = $build->notes('locale'); |
30 | my @stopwords; |
d45dd0ef |
31 | { |
49046f7e |
32 | local $/ = undef; |
33 | my $yaml = Load(encode_utf8(<DATA>)); |
34 | @stopwords = map { |
35 | # kill scalar's IV NV or else the smart-match later will bomb out |
36 | $_ eq 'Infinity' ? 'Infinity' : $_ |
37 | } @{ $yaml->{$locale} }; |
d45dd0ef |
38 | } |
39 | |
40 | my $iter = File::Next::files({ |
41 | file_filter => sub {/\.html \z/msx}, |
42 | sort_files => 1, |
43 | }, |
49046f7e |
44 | $build->destdir |
d45dd0ef |
45 | ); |
46 | |
47 | my $file_counter; |
48 | |
49 | my $stylesheet = XML::LibXSLT->new->parse_stylesheet( |
49046f7e |
50 | XML::LibXML->load_xml(string => <<"")); |
d45dd0ef |
51 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
52 | xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0"> |
49046f7e |
53 | <xsl:template match="xhtml:*[\@xml:lang!='$locale']"/> |
e95a25e2 |
54 | <xsl:template match="xhtml:abbr"/> |
55 | <xsl:template match="xhtml:acronym"/> |
56 | <xsl:template match="xhtml:code"/> <!-- filter computerese --> |
49046f7e |
57 | <xsl:template match="\@* | node()"> <!-- apply identity function to rest of nodes --> |
d45dd0ef |
58 | <xsl:copy> |
49046f7e |
59 | <xsl:apply-templates select="\@* | node()"/> |
d45dd0ef |
60 | </xsl:copy> |
61 | </xsl:template> |
62 | </xsl:stylesheet> |
63 | |
64 | while (defined(my $html_file = $iter->())) { |
65 | $file_counter++; |
66 | |
67 | my ($temp_handle, $temp_file) = tempfile; |
68 | my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,)); |
69 | $stylesheet->output_fh($transformed, $temp_handle); |
70 | |
71 | my ($stdout) = capture { |
49046f7e |
72 | system "aspell -H --encoding=UTF-8 -l $locale list < $temp_file"; |
d45dd0ef |
73 | }; |
74 | my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout; |
75 | ok !@misspelt_words, "$html_file ($temp_file) spell-check"; |
76 | diag join "\n", sort @misspelt_words if @misspelt_words; |
77 | } |
78 | |
79 | done_testing($file_counter); |
80 | |
81 | __DATA__ |
49046f7e |
82 | --- |
83 | en: |
d45dd0ef |
84 | ## personal names |
49046f7e |
85 | - Brocard |
86 | - Bunce's |
e95a25e2 |
87 | # Pierce Cawley |
49046f7e |
88 | - Cawley's |
89 | - Champoux |
90 | - chromatic |
e95a25e2 |
91 | # Sam Crawley |
49046f7e |
92 | - Crawley |
93 | - cuny's |
94 | - Doran |
95 | - franck |
96 | - Grünauer |
97 | - hakobe's |
98 | - hanekomu |
99 | - Hengst's |
e95a25e2 |
100 | # Kanat-Alexander |
49046f7e |
101 | - Kanat |
102 | - Kogman's |
103 | - Kuri's |
104 | - Léon |
105 | - Napiorkowski |
106 | - Pearcey's |
107 | - Prather |
108 | - Prather's |
109 | - Ragwitz |
110 | - Rockway |
111 | - Rodighiero |
112 | - Rolsky's |
113 | - Stevan |
114 | - sunnavy's |
115 | - Treder's |
116 | - trombik |
117 | - Vecchi |
118 | - Vilain |
119 | - Vilain's |
120 | - Yanick |
121 | - Yuval |
d45dd0ef |
122 | |
123 | ## proper names |
49046f7e |
124 | - BizRate |
125 | - Cisco |
126 | - Cloudtone |
127 | - DoctorBase |
128 | - Endeworks |
129 | - GitHub |
130 | - Gource |
131 | - IMDb |
e95a25e2 |
132 | # Kansai.pm |
49046f7e |
133 | - Kansai |
134 | - LinuxMag |
135 | - MedTouch |
136 | - MusicBrainz |
137 | - OCaml |
138 | - Omni |
139 | - OnLAMP |
140 | - PerlMonks |
141 | - Pobox |
142 | - Shadowcat |
143 | - Shopzilla |
144 | - SimplyClick |
145 | - Simula |
146 | - SocialText |
147 | - Symantec |
148 | - Takkle |
149 | - Tamarou |
150 | - TextMate |
151 | - ValueClick |
d45dd0ef |
152 | |
153 | ## Moose-specific |
154 | |
155 | ## computerese |
49046f7e |
156 | - parameterized |
d45dd0ef |
157 | |
158 | ## other jargon |
159 | |
160 | ## neologisms |
49046f7e |
161 | - blog |
162 | - podcast |
d45dd0ef |
163 | |
164 | ## compound |
e95a25e2 |
165 | # post-mortem |
49046f7e |
166 | - mortem |
e95a25e2 |
167 | # PDX.pm |
49046f7e |
168 | - PDX |
e95a25e2 |
169 | # London.pm's |
49046f7e |
170 | - pm's |
d45dd0ef |
171 | |
172 | ## slang |
173 | |
174 | ## things that should be in the dictionary, but are not |
49046f7e |
175 | - Bioinformatics |
176 | - Committers |
177 | - refactoring |
178 | - Refactoring |
d45dd0ef |
179 | |
180 | ## single foreign words |
181 | |
182 | ## misspelt on purpose |
49046f7e |
183 | |
184 | de: |
185 | |
186 | ## Personennamen |
187 | - Apocalypse |
188 | - Austins |
189 | - Barry |
190 | - Boones |
191 | - Brocard |
192 | - Bruno |
193 | - Bunce |
194 | - Cawleys |
195 | - Champoux |
196 | - Chris |
197 | - chromatic |
198 | - Cory |
199 | - Crawley |
200 | - cunys |
201 | - Dave |
202 | - Devin |
203 | - Doran |
204 | - Doug |
205 | - Drew |
206 | - franck |
207 | - Grünauer |
208 | - hakobes |
209 | - hanekomu |
210 | - Hengsts |
211 | - Jay |
212 | - Jonathan |
213 | - Kanat |
214 | - Kogmans |
215 | - Kuri |
216 | - Larry |
217 | - Léon |
218 | - Little |
219 | - Littles |
220 | - Marcel |
221 | - McLaughlin |
222 | - Mike |
223 | - Napiorkowski |
224 | - Ovid |
225 | - Ovids |
226 | - Pearceys |
227 | - Piers |
228 | - Prather |
229 | - Prathers |
230 | - Ragwitz |
231 | - Randal |
232 | - Rockway |
233 | - Rodighiero |
234 | - Rolsky |
235 | - Rolskys |
236 | - Schwartz |
237 | - Shawn |
238 | - Stefano |
239 | - Stephens |
240 | - Stevan |
241 | - sunnavys |
242 | - Tomas |
243 | - Treders |
244 | - trombik |
245 | - Trout |
246 | - Vecchi |
247 | - Vilain |
248 | - Vilains |
249 | - Walsh |
250 | - Watsons |
251 | - Whitakers |
252 | - Yanick |
253 | - Yuval |
254 | |
255 | ## Eigennamen |
256 | - at |
257 | - Advantage |
258 | - Beijing |
259 | - Best |
260 | - Bioinformatics |
261 | - BizRate |
262 | - Capitol |
263 | - Catalyst |
264 | - Cisco |
265 | - Cloudtone |
266 | - Corporation |
267 | - Current |
268 | - DoctorBase |
269 | - Doodle |
270 | - Endeworks |
271 | - Frozen |
272 | - Git |
273 | - GitHub |
274 | - Gource |
275 | - Group |
276 | - Hearst |
277 | - Houston |
278 | - IMDb |
279 | - Infinity |
280 | - Interactive |
281 | - Kansai |
282 | - Lexy |
283 | - LinuxMag |
284 | - Magazines |
285 | - MedTouch |
286 | - Melbourne |
287 | - MusicBrainz |
288 | - Napster |
289 | - Nashville |
290 | - Oasis |
291 | - OCaml |
292 | - Omni |
293 | - OnLAMP |
294 | - Open |
295 | - Overflow |
296 | - PerlMonks |
297 | - Pittsburgh |
298 | - Pobox |
299 | - Practical |
300 | - Ruby |
301 | - Shadowcat |
302 | - Shopzilla |
303 | - SimplyClick |
304 | - Simula |
305 | - SocialText |
306 | - Studios |
307 | - Symantec |
308 | - Takkle |
309 | - Tamarou |
310 | - TextMate |
311 | - The |
312 | - University |
313 | - ValueClick |
314 | - Yahoo |
315 | # net-a-porter.com |
316 | - com |
317 | - net |
318 | - porter |
319 | |
320 | ## Moose-spezifisch |
321 | - Metaobjektprotokoll |
322 | - MOP |
323 | - Mouse |
324 | - parameterisierte |
325 | - Objektmetaprogrammierung |
326 | |
327 | ## Computerjargon |
328 | - Beispielcode |
329 | - Codewiederverwendung |
330 | |
331 | ## anderer Jargon |
332 | - Perlmonger |
d48226eb |
333 | - Podcast |
49046f7e |
334 | |
335 | ## Neologismen |
336 | |
337 | ## Verbundworte |
338 | # Duck-Typing |
339 | - Duck |
340 | - Typing |
341 | # Lese-Evaluierungs-Ausgabe-Schleife |
342 | - Evaluierungs |
343 | # 100%ig |
344 | - ig |
345 | # Perl.it |
346 | - it |
347 | # E-Lamp |
348 | - Lamp |
349 | # Meta-Moose |
350 | - Meta |
351 | # PDX.pm |
352 | - PDX |
353 | # Plug-In |
354 | - Plug |
355 | # diverse .pm |
356 | - pm |
357 | # RC-Dateien |
358 | - RC |
359 | |
360 | ## Umgangssprache |
361 | |
362 | ## nicht im Wörterbuch, aber sollte drin stehen |
363 | - Blog |
364 | - Blogeintrag |
365 | - Blogeinträge |
366 | - Endbericht |
195ac982 |
367 | - Expertenschulung |
49046f7e |
368 | - Gemeinschaftsprojekt |
369 | - Gruppentreffen |
370 | - Hauptdistribution |
371 | - Hauptseite |
372 | - Hilfeangeboten |
373 | - nachzuschlagen |
374 | - Objektsystem |
375 | - Objektsystems |
376 | - Produktionseinsatz |
377 | - Proteinanalyse |
378 | - rollenbasierten |
379 | - Rollenzusammensetzung |
380 | - Schnellreferenzkarte |
381 | - Skriptoptionen |
382 | - Webansicht |
383 | |
384 | ## einzelne Fremdwörter |
385 | - Refactoring |
386 | - Repository |
387 | - Repositorys |
388 | |
389 | ## absichtlich falsch |