Commit | Line | Data |
d45dd0ef |
1 | #!perl |
2 | use 5.010; |
3 | use utf8; |
4 | use strict; |
5 | use warnings FATAL => 'all'; |
6 | use autodie qw(:all); |
7 | use Capture::Tiny qw(capture); |
8 | use Encode qw(decode_utf8); |
9 | use File::Next qw(); |
10 | use File::Temp qw(tempfile); |
11 | use File::Which qw(which); |
12 | use Test::More; |
13 | use XML::LibXML qw(); |
14 | use XML::LibXSLT qw(); |
15 | |
16 | binmode Test::More->builder->$_, ':encoding(UTF-8)' |
17 | for qw(output failure_output todo_output); |
18 | |
19 | # Skip means sweep bugs under the rug. |
20 | # I want this test to be actually run. |
21 | BAIL_OUT 'aspell is not installed.' unless which 'aspell'; |
22 | |
23 | my @stopwords; |
24 | for (<DATA>) { |
25 | chomp; |
26 | push @stopwords, $_ unless /\A (?: \# | \s* \z)/msx; # skip comments, whitespace |
27 | } |
28 | |
29 | my $destdir; |
30 | { |
31 | my $runtime_params_file = '_build/runtime_params'; |
32 | my $runtime_params = do $runtime_params_file; |
33 | die "Could not load $runtime_params_file. Run Build.PL first.\n" |
34 | unless $runtime_params; |
35 | $destdir = $runtime_params->{destdir}; |
36 | } |
37 | |
38 | my $iter = File::Next::files({ |
39 | file_filter => sub {/\.html \z/msx}, |
40 | sort_files => 1, |
41 | }, |
42 | $destdir |
43 | ); |
44 | |
45 | my $file_counter; |
46 | |
47 | my $stylesheet = XML::LibXSLT->new->parse_stylesheet( |
48 | XML::LibXML->load_xml(string => <<'')); |
49 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
50 | xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0"> |
e95a25e2 |
51 | <xsl:template match="xhtml:abbr"/> |
52 | <xsl:template match="xhtml:acronym"/> |
53 | <xsl:template match="xhtml:code"/> <!-- filter computerese --> |
d45dd0ef |
54 | <xsl:template match="@* | node()"> <!-- apply identity function to rest of nodes --> |
55 | <xsl:copy> |
56 | <xsl:apply-templates select="@* | node()"/> |
57 | </xsl:copy> |
58 | </xsl:template> |
59 | </xsl:stylesheet> |
60 | |
61 | while (defined(my $html_file = $iter->())) { |
62 | $file_counter++; |
63 | |
64 | my ($temp_handle, $temp_file) = tempfile; |
65 | my $transformed = $stylesheet->transform(XML::LibXML->load_xml(location => $html_file, load_ext_dtd => 0,)); |
66 | $stylesheet->output_fh($transformed, $temp_handle); |
67 | |
68 | my ($stdout) = capture { |
69 | system "aspell -H --encoding=UTF-8 -l en list < $temp_file"; |
70 | }; |
71 | my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout; |
72 | ok !@misspelt_words, "$html_file ($temp_file) spell-check"; |
73 | diag join "\n", sort @misspelt_words if @misspelt_words; |
74 | } |
75 | |
76 | done_testing($file_counter); |
77 | |
78 | __DATA__ |
79 | ## personal names |
e95a25e2 |
80 | Brocard |
81 | Bunce's |
82 | # Pierce Cawley |
83 | Cawley's |
84 | Champoux |
85 | chromatic |
86 | # Sam Crawley |
87 | Crawley |
88 | cuny's |
89 | Doran |
90 | franck |
91 | Grünauer |
92 | hakobe's |
93 | hanekomu |
94 | Hengst's |
95 | # Kanat-Alexander |
96 | Kanat |
97 | Kogman's |
98 | Léon |
99 | Napiorkowski |
100 | Pearcey's |
101 | Prather |
102 | Prather's |
103 | Ragwitz |
104 | Rockway |
105 | Rolsky's |
106 | Stevan |
107 | sunnavy's |
108 | Treder's |
109 | trombik |
110 | Vecchi |
111 | Vilain's |
112 | Yanick |
113 | Yuval |
d45dd0ef |
114 | |
115 | ## proper names |
e95a25e2 |
116 | BizRate |
117 | Cisco |
118 | Cloudtone |
119 | Endeworks |
120 | GitHub |
121 | Gource |
122 | IMDb |
123 | # Kansai.pm |
124 | Kansai |
125 | LinuxMag |
126 | MedTouch |
127 | MusicBrainz |
128 | OCaml |
129 | Omni |
130 | OnLAMP |
131 | PerlMonks |
132 | Pobox |
133 | Shadowcat |
134 | Shopzilla |
135 | SimplyClick |
136 | Simula |
137 | SocialText |
138 | Symantec |
139 | Takkle |
140 | Tamarou |
141 | TextMate |
142 | ValueClick |
d45dd0ef |
143 | |
144 | ## Moose-specific |
145 | |
146 | ## computerese |
e95a25e2 |
147 | parameterized |
d45dd0ef |
148 | |
149 | ## other jargon |
150 | |
151 | ## neologisms |
e95a25e2 |
152 | blog |
153 | podcast |
d45dd0ef |
154 | |
155 | ## compound |
e95a25e2 |
156 | # post-mortem |
157 | mortem |
158 | # PDX.pm |
159 | PDX |
160 | # London.pm's |
161 | pm's |
d45dd0ef |
162 | |
163 | ## slang |
164 | |
165 | ## things that should be in the dictionary, but are not |
e95a25e2 |
166 | Bioinformatics |
167 | Committers |
168 | refactoring |
169 | Refactoring |
d45dd0ef |
170 | |
171 | ## single foreign words |
172 | |
173 | ## misspelt on purpose |