Commit | Line | Data |
d45dd0ef |
1 | #!perl |
2 | use 5.010; |
3 | use utf8; |
4 | use strict; |
5 | use warnings FATAL => 'all'; |
6 | use autodie qw(:all); |
7 | use Capture::Tiny qw(capture); |
49046f7e |
8 | use Encode qw(encode_utf8 decode_utf8); |
d45dd0ef |
9 | use File::Next qw(); |
10 | use File::Temp qw(tempfile); |
11 | use File::Which qw(which); |
49046f7e |
12 | use Module::Build qw(); |
d45dd0ef |
13 | use Test::More; |
14 | use XML::LibXML qw(); |
15 | use XML::LibXSLT qw(); |
49046f7e |
16 | use YAML::XS qw(Load); |
d45dd0ef |
17 | |
18 | binmode Test::More->builder->$_, ':encoding(UTF-8)' |
19 | for qw(output failure_output todo_output); |
20 | |
21 | # Skip means sweep bugs under the rug. |
22 | # I want this test to be actually run. |
23 | BAIL_OUT 'aspell is not installed.' unless which 'aspell'; |
24 | |
49046f7e |
25 | my $build; |
26 | eval { $build = Module::Build->current; 1; } |
27 | or BAIL_OUT 'We are not in a Module::Build session. Run Build.PL first.'; |
d45dd0ef |
28 | |
49046f7e |
29 | my $locale = $build->notes('locale'); |
30 | my @stopwords; |
d45dd0ef |
31 | { |
49046f7e |
32 | local $/ = undef; |
33 | my $yaml = Load(encode_utf8(<DATA>)); |
34 | @stopwords = map { |
35 | # kill scalar's IV NV or else the smart-match later will bomb out |
36 | $_ eq 'Infinity' ? 'Infinity' : $_ |
37 | } @{ $yaml->{$locale} }; |
d45dd0ef |
38 | } |
39 | |
40 | my $iter = File::Next::files({ |
41 | file_filter => sub {/\.html \z/msx}, |
42 | sort_files => 1, |
43 | }, |
49046f7e |
44 | $build->destdir |
d45dd0ef |
45 | ); |
46 | |
47 | my $file_counter; |
48 | |
49 | my $stylesheet = XML::LibXSLT->new->parse_stylesheet( |
49046f7e |
50 | XML::LibXML->load_xml(string => <<"")); |
d45dd0ef |
51 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
52 | xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0"> |
49046f7e |
53 | <xsl:template match="xhtml:*[\@xml:lang!='$locale']"/> |
e95a25e2 |
54 | <xsl:template match="xhtml:abbr"/> |
55 | <xsl:template match="xhtml:acronym"/> |
56 | <xsl:template match="xhtml:code"/> <!-- filter computerese --> |
49046f7e |
57 | <xsl:template match="\@* | node()"> <!-- apply identity function to rest of nodes --> |
d45dd0ef |
58 | <xsl:copy> |
49046f7e |
59 | <xsl:apply-templates select="\@* | node()"/> |
d45dd0ef |
60 | </xsl:copy> |
61 | </xsl:template> |
62 | </xsl:stylesheet> |
63 | |
64 | while (defined(my $html_file = $iter->())) { |
65 | $file_counter++; |
66 | |
67 | my ($temp_handle, $temp_file) = tempfile; |
68 | my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,)); |
69 | $stylesheet->output_fh($transformed, $temp_handle); |
70 | |
71 | my ($stdout) = capture { |
49046f7e |
72 | system "aspell -H --encoding=UTF-8 -l $locale list < $temp_file"; |
d45dd0ef |
73 | }; |
74 | my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout; |
75 | ok !@misspelt_words, "$html_file ($temp_file) spell-check"; |
76 | diag join "\n", sort @misspelt_words if @misspelt_words; |
77 | } |
78 | |
79 | done_testing($file_counter); |
80 | |
81 | __DATA__ |
49046f7e |
82 | --- |
83 | en: |
d45dd0ef |
84 | ## personal names |
49046f7e |
85 | - Brocard |
86 | - Bunce's |
e95a25e2 |
87 | # Pierce Cawley |
49046f7e |
88 | - Cawley's |
89 | - Champoux |
90 | - chromatic |
e95a25e2 |
91 | # Sam Crawley |
49046f7e |
92 | - Crawley |
93 | - cuny's |
94 | - Doran |
95 | - franck |
96 | - Grünauer |
97 | - hakobe's |
98 | - hanekomu |
06ff6213 |
99 | - JT |
49046f7e |
100 | - Hengst's |
e95a25e2 |
101 | # Kanat-Alexander |
49046f7e |
102 | - Kanat |
06ff6213 |
103 | - Kogman |
49046f7e |
104 | - Kogman's |
105 | - Kuri's |
06ff6213 |
106 | - Lenz |
49046f7e |
107 | - Léon |
06ff6213 |
108 | - Luehrs's |
49046f7e |
109 | - Napiorkowski |
110 | - Pearcey's |
111 | - Prather |
112 | - Prather's |
113 | - Ragwitz |
114 | - Rockway |
115 | - Rodighiero |
116 | - Rolsky's |
117 | - Stevan |
118 | - sunnavy's |
119 | - Treder's |
120 | - trombik |
121 | - Vecchi |
122 | - Vilain |
123 | - Vilain's |
124 | - Yanick |
125 | - Yuval |
d45dd0ef |
126 | |
127 | ## proper names |
06ff6213 |
128 | - AirG |
49046f7e |
129 | - BizRate |
06ff6213 |
130 | - CATH |
131 | - CLAB |
49046f7e |
132 | - Cisco |
133 | - Cloudtone |
134 | - DoctorBase |
135 | - Endeworks |
136 | - GitHub |
137 | - Gource |
138 | - IMDb |
e95a25e2 |
139 | # Kansai.pm |
49046f7e |
140 | - Kansai |
141 | - LinuxMag |
06ff6213 |
142 | - Matsch |
49046f7e |
143 | - MedTouch |
144 | - MusicBrainz |
145 | - OCaml |
146 | - Omni |
147 | - OnLAMP |
148 | - PerlMonks |
149 | - Pobox |
150 | - Shadowcat |
151 | - Shopzilla |
152 | - SimplyClick |
153 | - Simula |
154 | - SocialText |
155 | - Symantec |
156 | - Takkle |
157 | - Tamarou |
158 | - TextMate |
06ff6213 |
159 | - UNMC |
160 | - UNO |
49046f7e |
161 | - ValueClick |
06ff6213 |
162 | - WebGUI |
d45dd0ef |
163 | |
164 | ## Moose-specific |
165 | |
166 | ## computerese |
06ff6213 |
167 | - Parameterized |
49046f7e |
168 | - parameterized |
d45dd0ef |
169 | |
170 | ## other jargon |
171 | |
172 | ## neologisms |
06ff6213 |
173 | - Blog |
49046f7e |
174 | - blog |
175 | - podcast |
d45dd0ef |
176 | |
177 | ## compound |
e95a25e2 |
178 | # post-mortem |
49046f7e |
179 | - mortem |
e95a25e2 |
180 | # PDX.pm |
49046f7e |
181 | - PDX |
e95a25e2 |
182 | # London.pm's |
49046f7e |
183 | - pm's |
d45dd0ef |
184 | |
185 | ## slang |
186 | |
187 | ## things that should be in the dictionary, but are not |
49046f7e |
188 | - Bioinformatics |
189 | - Committers |
190 | - refactoring |
191 | - Refactoring |
d45dd0ef |
192 | |
193 | ## single foreign words |
194 | |
195 | ## misspelt on purpose |
49046f7e |
196 | |
197 | de: |
198 | |
199 | ## Personennamen |
200 | - Apocalypse |
06ff6213 |
201 | - Austin |
49046f7e |
202 | - Austins |
203 | - Barry |
204 | - Boones |
205 | - Brocard |
206 | - Bruno |
207 | - Bunce |
208 | - Cawleys |
209 | - Champoux |
210 | - Chris |
211 | - chromatic |
212 | - Cory |
213 | - Crawley |
214 | - cunys |
215 | - Dave |
216 | - Devin |
217 | - Doran |
218 | - Doug |
219 | - Drew |
220 | - franck |
221 | - Grünauer |
222 | - hakobes |
223 | - hanekomu |
224 | - Hengsts |
225 | - Jay |
06ff6213 |
226 | - Jesse |
49046f7e |
227 | - Jonathan |
06ff6213 |
228 | - JT |
49046f7e |
229 | - Kanat |
06ff6213 |
230 | - Kogman |
49046f7e |
231 | - Kogmans |
232 | - Kuri |
233 | - Larry |
234 | - Léon |
06ff6213 |
235 | - Luehrs |
49046f7e |
236 | - Little |
237 | - Littles |
238 | - Marcel |
06ff6213 |
239 | - Moritz |
49046f7e |
240 | - McLaughlin |
241 | - Mike |
242 | - Napiorkowski |
243 | - Ovid |
244 | - Ovids |
245 | - Pearceys |
246 | - Piers |
247 | - Prather |
248 | - Prathers |
249 | - Ragwitz |
250 | - Randal |
251 | - Rockway |
252 | - Rodighiero |
253 | - Rolsky |
254 | - Rolskys |
255 | - Schwartz |
256 | - Shawn |
06ff6213 |
257 | - Smith |
49046f7e |
258 | - Stefano |
259 | - Stephens |
260 | - Stevan |
06ff6213 |
261 | - Stuart |
49046f7e |
262 | - sunnavys |
263 | - Tomas |
264 | - Treders |
265 | - trombik |
266 | - Trout |
267 | - Vecchi |
268 | - Vilain |
269 | - Vilains |
270 | - Walsh |
271 | - Watsons |
272 | - Whitakers |
273 | - Yanick |
274 | - Yuval |
275 | |
276 | ## Eigennamen |
49046f7e |
277 | - Beijing |
49046f7e |
278 | - Catalyst |
49046f7e |
279 | - Doodle |
06ff6213 |
280 | - Expanse |
49046f7e |
281 | - Frozen |
282 | - Git |
283 | - GitHub |
284 | - Gource |
49046f7e |
285 | - Houston |
49046f7e |
286 | - Infinity |
287 | - Interactive |
288 | - Kansai |
06ff6213 |
289 | - Lacuna |
49046f7e |
290 | - LinuxMag |
49046f7e |
291 | - Melbourne |
49046f7e |
292 | - Nashville |
293 | - Oasis |
294 | - OCaml |
49046f7e |
295 | - OnLAMP |
49046f7e |
296 | - Overflow |
297 | - PerlMonks |
298 | - Pittsburgh |
49046f7e |
299 | - Ruby |
49046f7e |
300 | - Simula |
49046f7e |
301 | - TextMate |
06ff6213 |
302 | - WebGUI |
49046f7e |
303 | |
304 | ## Moose-spezifisch |
305 | - Metaobjektprotokoll |
306 | - MOP |
307 | - Mouse |
308 | - parameterisierte |
309 | - Objektmetaprogrammierung |
310 | |
311 | ## Computerjargon |
312 | - Beispielcode |
313 | - Codewiederverwendung |
314 | |
315 | ## anderer Jargon |
316 | - Perlmonger |
d48226eb |
317 | - Podcast |
49046f7e |
318 | |
319 | ## Neologismen |
320 | |
321 | ## Verbundworte |
322 | # Duck-Typing |
323 | - Duck |
324 | - Typing |
325 | # Lese-Evaluierungs-Ausgabe-Schleife |
326 | - Evaluierungs |
327 | # 100%ig |
328 | - ig |
329 | # Perl.it |
330 | - it |
331 | # E-Lamp |
332 | - Lamp |
333 | # Meta-Moose |
334 | - Meta |
335 | # PDX.pm |
336 | - PDX |
337 | # Plug-In |
338 | - Plug |
339 | # diverse .pm |
340 | - pm |
341 | # RC-Dateien |
342 | - RC |
343 | |
344 | ## Umgangssprache |
345 | |
346 | ## nicht im Wörterbuch, aber sollte drin stehen |
06ff6213 |
347 | - Antiquierung |
49046f7e |
348 | - Blog |
349 | - Blogeintrag |
350 | - Blogeinträge |
06ff6213 |
351 | - Datenkapselung |
49046f7e |
352 | - Endbericht |
195ac982 |
353 | - Expertenschulung |
49046f7e |
354 | - Gemeinschaftsprojekt |
355 | - Gruppentreffen |
356 | - Hauptdistribution |
357 | - Hauptseite |
358 | - Hilfeangeboten |
359 | - nachzuschlagen |
360 | - Objektsystem |
361 | - Objektsystems |
362 | - Produktionseinsatz |
363 | - Proteinanalyse |
364 | - rollenbasierten |
365 | - Rollenzusammensetzung |
366 | - Schnellreferenzkarte |
367 | - Skriptoptionen |
368 | - Webansicht |
369 | |
370 | ## einzelne Fremdwörter |
371 | - Refactoring |
372 | - Repository |
373 | - Repositorys |
374 | |
375 | ## absichtlich falsch |