Commit | Line | Data |
d45dd0ef |
1 | #!perl |
2 | use 5.010; |
3 | use utf8; |
4 | use strict; |
5 | use warnings FATAL => 'all'; |
6 | use autodie qw(:all); |
7 | use Capture::Tiny qw(capture); |
8 | use Encode qw(decode_utf8); |
9 | use File::Next qw(); |
10 | use File::Temp qw(tempfile); |
11 | use File::Which qw(which); |
12 | use Test::More; |
13 | use XML::LibXML qw(); |
14 | use XML::LibXSLT qw(); |
15 | |
16 | binmode Test::More->builder->$_, ':encoding(UTF-8)' |
17 | for qw(output failure_output todo_output); |
18 | |
19 | # Skip means sweep bugs under the rug. |
20 | # I want this test to be actually run. |
21 | BAIL_OUT 'aspell is not installed.' unless which 'aspell'; |
22 | |
23 | my @stopwords; |
24 | for (<DATA>) { |
25 | chomp; |
26 | push @stopwords, $_ unless /\A (?: \# | \s* \z)/msx; # skip comments, whitespace |
27 | } |
28 | |
29 | my $destdir; |
30 | { |
31 | my $runtime_params_file = '_build/runtime_params'; |
32 | my $runtime_params = do $runtime_params_file; |
33 | die "Could not load $runtime_params_file. Run Build.PL first.\n" |
34 | unless $runtime_params; |
35 | $destdir = $runtime_params->{destdir}; |
36 | } |
37 | |
38 | my $iter = File::Next::files({ |
39 | file_filter => sub {/\.html \z/msx}, |
40 | sort_files => 1, |
41 | }, |
42 | $destdir |
43 | ); |
44 | |
45 | my $file_counter; |
46 | |
47 | my $stylesheet = XML::LibXSLT->new->parse_stylesheet( |
48 | XML::LibXML->load_xml(string => <<'')); |
49 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
50 | xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0"> |
e95a25e2 |
51 | <xsl:template match="xhtml:abbr"/> |
52 | <xsl:template match="xhtml:acronym"/> |
53 | <xsl:template match="xhtml:code"/> <!-- filter computerese --> |
d45dd0ef |
54 | <xsl:template match="@* | node()"> <!-- apply identity function to rest of nodes --> |
55 | <xsl:copy> |
56 | <xsl:apply-templates select="@* | node()"/> |
57 | </xsl:copy> |
58 | </xsl:template> |
59 | </xsl:stylesheet> |
60 | |
61 | while (defined(my $html_file = $iter->())) { |
62 | $file_counter++; |
63 | |
64 | my ($temp_handle, $temp_file) = tempfile; |
65 | my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,)); |
66 | $stylesheet->output_fh($transformed, $temp_handle); |
67 | |
68 | my ($stdout) = capture { |
69 | system "aspell -H --encoding=UTF-8 -l en list < $temp_file"; |
70 | }; |
71 | my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout; |
72 | ok !@misspelt_words, "$html_file ($temp_file) spell-check"; |
73 | diag join "\n", sort @misspelt_words if @misspelt_words; |
74 | } |
75 | |
76 | done_testing($file_counter); |
77 | |
78 | __DATA__ |
79 | ## personal names |
e95a25e2 |
80 | Brocard |
81 | Bunce's |
82 | # Pierce Cawley |
83 | Cawley's |
84 | Champoux |
85 | chromatic |
86 | # Sam Crawley |
87 | Crawley |
88 | cuny's |
89 | Doran |
90 | franck |
91 | Grünauer |
92 | hakobe's |
93 | hanekomu |
94 | Hengst's |
95 | # Kanat-Alexander |
96 | Kanat |
97 | Kogman's |
10b2c9bf |
98 | Kuri's |
e95a25e2 |
99 | Léon |
100 | Napiorkowski |
101 | Pearcey's |
102 | Prather |
103 | Prather's |
104 | Ragwitz |
105 | Rockway |
10b2c9bf |
106 | Rodighiero |
e95a25e2 |
107 | Rolsky's |
108 | Stevan |
109 | sunnavy's |
110 | Treder's |
111 | trombik |
112 | Vecchi |
10b2c9bf |
113 | Vilain |
e95a25e2 |
114 | Vilain's |
115 | Yanick |
116 | Yuval |
d45dd0ef |
117 | |
118 | ## proper names |
e95a25e2 |
119 | BizRate |
120 | Cisco |
121 | Cloudtone |
10b2c9bf |
122 | DoctorBase |
e95a25e2 |
123 | Endeworks |
124 | GitHub |
125 | Gource |
126 | IMDb |
127 | # Kansai.pm |
128 | Kansai |
129 | LinuxMag |
130 | MedTouch |
131 | MusicBrainz |
132 | OCaml |
133 | Omni |
134 | OnLAMP |
135 | PerlMonks |
136 | Pobox |
137 | Shadowcat |
138 | Shopzilla |
139 | SimplyClick |
140 | Simula |
141 | SocialText |
142 | Symantec |
143 | Takkle |
144 | Tamarou |
145 | TextMate |
146 | ValueClick |
d45dd0ef |
147 | |
148 | ## Moose-specific |
149 | |
150 | ## computerese |
e95a25e2 |
151 | parameterized |
d45dd0ef |
152 | |
153 | ## other jargon |
154 | |
155 | ## neologisms |
e95a25e2 |
156 | blog |
157 | podcast |
d45dd0ef |
158 | |
159 | ## compound |
e95a25e2 |
160 | # post-mortem |
161 | mortem |
162 | # PDX.pm |
163 | PDX |
164 | # London.pm's |
165 | pm's |
d45dd0ef |
166 | |
167 | ## slang |
168 | |
169 | ## things that should be in the dictionary, but are not |
e95a25e2 |
170 | Bioinformatics |
171 | Committers |
172 | refactoring |
173 | Refactoring |
d45dd0ef |
174 | |
175 | ## single foreign words |
176 | |
177 | ## misspelt on purpose |