update tests for l10n
Lars Dɪᴇᴄᴋᴏᴡ 迪拉斯 [Wed, 14 Apr 2010 17:38:23 +0000 (19:38 +0200)]
Build.PL
inc/Local/Moose/Website/Build.pm
t/01-validate.t
t/02-spelling.t

index 82dce12..0c90b2c 100644 (file)
--- a/Build.PL
+++ b/Build.PL
@@ -15,12 +15,13 @@ my $self = Local::Moose::Website::Build->new(
         'Test::More'    => 0,
         'XML::LibXML'   => 0,
         'XML::LibXSLT'  => 0,
+        'YAML::XS'      => 0,
     },
     requires => {
         'Locale::Maketext::Lexicon'  => 0,
         Moose                        => 0,
-        'MooseX::Params::Validate'   => 0, 
-        'MooseX::Role::Parameterized'=> 0, 
+        'MooseX::Params::Validate'   => 0,
+        'MooseX::Role::Parameterized'=> 0,
         'MooseX::Types::Path::Class' => 0,
         'Path::Class'                => 0,
         'Resource::Pack'             => 0,   # http://github.com/stevan/Resource-Pack
@@ -28,8 +29,8 @@ my $self = Local::Moose::Website::Build->new(
         'YAML::XS'                   => 0,
     },
 );
-die "perldoc Build.PL\n" unless $self->destdir;
-
+die "perldoc Build.PL\n" unless $self->destdir && $self->args('locale');
+$self->notes('locale' => $self->args('locale'));
 $self->create_build_script;
 
 __END__
@@ -38,6 +39,6 @@ __END__
 
 =head1 SYNOPSIS
 
-    perl Build.PL --destdir=out
+    perl Build.PL --destdir=out --locale=en
     ./Build test
     ./Build distclean
index a9a1448..0509efb 100644 (file)
@@ -6,7 +6,8 @@ sub ACTION_code {
     my ($self) = @_;
     my $out = $self->destdir;
     make_path($out);
-    system $^X, 'bin/build_site.pl', '--outdir', $out;
+    system $^X, 'bin/build_site.pl', '--outdir', $out,
+      '--locale', $self->notes('locale');
     $self->add_to_cleanup($out);
     $self->depends_on('config_data');
     return;
index 41ade21..a1a9a67 100644 (file)
@@ -6,6 +6,7 @@ use warnings FATAL => 'all';
 use Capture::Tiny qw(capture);
 use File::Next qw();
 use File::Which qw(which);
+use Module::Build qw();
 use Test::More;
 
 # Skip means sweep bugs under the rug.
@@ -13,20 +14,15 @@ use Test::More;
 BAIL_OUT 'xmllint (part of the libxml2 package) not installed.'
   unless which 'xmllint';
 
-my $destdir;
-{
-    my $runtime_params_file = '_build/runtime_params';
-    my $runtime_params      = do $runtime_params_file;
-    die "Could not load $runtime_params_file. Run Build.PL first.\n"
-      unless $runtime_params;
-    $destdir = $runtime_params->{destdir};
-}
+my $build;
+eval { $build = Module::Build->current; 1; }
+  or BAIL_OUT 'We are not in a Module::Build session. Run Build.PL first.';
 
 my $iter = File::Next::files({
         file_filter => sub {/\.html \z/msx},
         sort_files  => 1,
     },
-    $destdir
+    $build->destdir
 );
 
 my $file_counter;
index d8f89f3..30e053b 100644 (file)
@@ -5,13 +5,15 @@ use strict;
 use warnings FATAL => 'all';
 use autodie qw(:all);
 use Capture::Tiny qw(capture);
-use Encode qw(decode_utf8);
+use Encode qw(encode_utf8 decode_utf8);
 use File::Next qw();
 use File::Temp qw(tempfile);
 use File::Which qw(which);
+use Module::Build qw();
 use Test::More;
 use XML::LibXML qw();
 use XML::LibXSLT qw();
+use YAML::XS qw(Load);
 
 binmode Test::More->builder->$_, ':encoding(UTF-8)'
     for qw(output failure_output todo_output);
@@ -20,40 +22,41 @@ binmode Test::More->builder->$_, ':encoding(UTF-8)'
 # I want this test to be actually run.
 BAIL_OUT 'aspell is not installed.' unless which 'aspell';
 
-my @stopwords;
-for (<DATA>) {
-    chomp;
-    push @stopwords, $_ unless /\A (?: \# | \s* \z)/msx;    # skip comments, whitespace
-}
+my $build;
+eval { $build = Module::Build->current; 1; }
+  or BAIL_OUT 'We are not in a Module::Build session. Run Build.PL first.';
 
-my $destdir;
+my $locale = $build->notes('locale');
+my @stopwords;
 {
-    my $runtime_params_file = '_build/runtime_params';
-    my $runtime_params      = do $runtime_params_file;
-    die "Could not load $runtime_params_file. Run Build.PL first.\n"
-      unless $runtime_params;
-    $destdir = $runtime_params->{destdir};
+    local $/ = undef;
+    my $yaml = Load(encode_utf8(<DATA>));
+    @stopwords = map {
+        # kill scalar's IV NV or else the smart-match later will bomb out
+        $_ eq 'Infinity' ? 'Infinity' : $_
+    } @{ $yaml->{$locale} };
 }
 
 my $iter = File::Next::files({
         file_filter => sub {/\.html \z/msx},
         sort_files  => 1,
     },
-    $destdir
+    $build->destdir
 );
 
 my $file_counter;
 
 my $stylesheet = XML::LibXSLT->new->parse_stylesheet(
-    XML::LibXML->load_xml(string => <<''));
+    XML::LibXML->load_xml(string => <<""));
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:xhtml="http://www.w3.org/1999/xhtml" version="1.0">
+    <xsl:template match="xhtml:*[\@xml:lang!='$locale']"/>
     <xsl:template match="xhtml:abbr"/>
     <xsl:template match="xhtml:acronym"/>
     <xsl:template match="xhtml:code"/> <!-- filter computerese -->
-    <xsl:template match="@* | node()"> <!-- apply identity function to rest of nodes -->
+    <xsl:template match="\@* | node()"> <!-- apply identity function to rest of nodes -->
         <xsl:copy>
-            <xsl:apply-templates select="@* | node()"/>
+            <xsl:apply-templates select="\@* | node()"/>
         </xsl:copy>
     </xsl:template>
 </xsl:stylesheet>
@@ -66,7 +69,7 @@ while (defined(my $html_file = $iter->())) {
     $stylesheet->output_fh($transformed, $temp_handle);
 
     my ($stdout) = capture {
-        system "aspell -H --encoding=UTF-8 -l en list < $temp_file";
+        system "aspell -H --encoding=UTF-8 -l $locale list < $temp_file";
     };
     my @misspelt_words = grep {!($_ ~~ @stopwords)} split /\n/, decode_utf8 $stdout;
     ok !@misspelt_words, "$html_file ($temp_file) spell-check";
@@ -76,102 +79,310 @@ while (defined(my $html_file = $iter->())) {
 done_testing($file_counter);
 
 __DATA__
+---
+en:
 ## personal names
-Brocard
-Bunce's
+- Brocard
+- Bunce's
 # Pierce Cawley
-Cawley's
-Champoux
-chromatic
+- Cawley's
+- Champoux
+- chromatic
 # Sam Crawley
-Crawley
-cuny's
-Doran
-franck
-Grünauer
-hakobe's
-hanekomu
-Hengst's
+- Crawley
+- cuny's
+- Doran
+- franck
+- Grünauer
+- hakobe's
+- hanekomu
+- Hengst's
 # Kanat-Alexander
-Kanat
-Kogman's
-Kuri's
-Léon
-Napiorkowski
-Pearcey's
-Prather
-Prather's
-Ragwitz
-Rockway
-Rodighiero
-Rolsky's
-Stevan
-sunnavy's
-Treder's
-trombik
-Vecchi
-Vilain
-Vilain's
-Yanick
-Yuval
+- Kanat
+- Kogman's
+- Kuri's
+- Léon
+- Napiorkowski
+- Pearcey's
+- Prather
+- Prather's
+- Ragwitz
+- Rockway
+- Rodighiero
+- Rolsky's
+- Stevan
+- sunnavy's
+- Treder's
+- trombik
+- Vecchi
+- Vilain
+- Vilain's
+- Yanick
+- Yuval
 
 ## proper names
-BizRate
-Cisco
-Cloudtone
-DoctorBase
-Endeworks
-GitHub
-Gource
-IMDb
+- BizRate
+- Cisco
+- Cloudtone
+- DoctorBase
+- Endeworks
+- GitHub
+- Gource
+- IMDb
 # Kansai.pm
-Kansai
-LinuxMag
-MedTouch
-MusicBrainz
-OCaml
-Omni
-OnLAMP
-PerlMonks
-Pobox
-Shadowcat
-Shopzilla
-SimplyClick
-Simula
-SocialText
-Symantec
-Takkle
-Tamarou
-TextMate
-ValueClick
+- Kansai
+- LinuxMag
+- MedTouch
+- MusicBrainz
+- OCaml
+- Omni
+- OnLAMP
+- PerlMonks
+- Pobox
+- Shadowcat
+- Shopzilla
+- SimplyClick
+- Simula
+- SocialText
+- Symantec
+- Takkle
+- Tamarou
+- TextMate
+- ValueClick
 
 ## Moose-specific
 
 ## computerese
-parameterized
+- parameterized
 
 ## other jargon
 
 ## neologisms
-blog
-podcast
+- blog
+- podcast
 
 ## compound
 # post-mortem
-mortem
+- mortem
 # PDX.pm
-PDX
+- PDX
 # London.pm's
-pm's
+- pm's
 
 ## slang
 
 ## things that should be in the dictionary, but are not
-Bioinformatics
-Committers
-refactoring
-Refactoring
+- Bioinformatics
+- Committers
+- refactoring
+- Refactoring
 
 ## single foreign words
 
 ## misspelt on purpose
+
+de:
+
+## Personennamen
+- Apocalypse
+- Austins
+- Barry
+- Boones
+- Brocard
+- Bruno
+- Bunce
+- Cawleys
+- Champoux
+- Chris
+- chromatic
+- Cory
+- Crawley
+- cunys
+- Dave
+- Devin
+- Doran
+- Doug
+- Drew
+- franck
+- Grünauer
+- hakobes
+- hanekomu
+- Hengsts
+- Jay
+- Jonathan
+- Kanat
+- Kogmans
+- Kuri
+- Larry
+- Léon
+- Little
+- Littles
+- Marcel
+- McLaughlin
+- Mike
+- Napiorkowski
+- Ovid
+- Ovids
+- Pearceys
+- Piers
+- Prather
+- Prathers
+- Ragwitz
+- Randal
+- Rockway
+- Rodighiero
+- Rolsky
+- Rolskys
+- Schwartz
+- Shawn
+- Stefano
+- Stephens
+- Stevan
+- sunnavys
+- Tomas
+- Treders
+- trombik
+- Trout
+- Vecchi
+- Vilain
+- Vilains
+- Walsh
+- Watsons
+- Whitakers
+- Yanick
+- Yuval
+
+## Eigennamen
+- at
+- Advantage
+- Beijing
+- Best
+- Bioinformatics
+- BizRate
+- Capitol
+- Catalyst
+- Cisco
+- Cloudtone
+- Corporation
+- Current
+- DoctorBase
+- Doodle
+- Endeworks
+- Frozen
+- Git
+- GitHub
+- Gource
+- Group
+- Hearst
+- Houston
+- IMDb
+- Infinity
+- Interactive
+- Kansai
+- Lexy
+- LinuxMag
+- Magazines
+- MedTouch
+- Melbourne
+- MusicBrainz
+- Napster
+- Nashville
+- Oasis
+- OCaml
+- Omni
+- OnLAMP
+- Open
+- Overflow
+- PerlMonks
+- Pittsburgh
+- Pobox
+- Practical
+- Ruby
+- Shadowcat
+- Shopzilla
+- SimplyClick
+- Simula
+- SocialText
+- Studios
+- Symantec
+- Takkle
+- Tamarou
+- TextMate
+- The
+- University
+- ValueClick
+- Yahoo
+# net-a-porter.com
+- com
+- net
+- porter
+
+## Moose-spezifisch
+- Metaobjektprotokoll
+- MOP
+- Mouse
+- parameterisierte
+- Objektmetaprogrammierung
+
+## Computerjargon
+- Beispielcode
+- Codewiederverwendung
+
+## anderer Jargon
+- Perlmonger
+
+## Neologismen
+
+## Verbundworte
+# Duck-Typing
+- Duck
+- Typing
+# Lese-Evaluierungs-Ausgabe-Schleife
+- Evaluierungs
+# 100%ig
+- ig
+# Perl.it
+- it
+# E-Lamp
+- Lamp
+# Meta-Moose
+- Meta
+# PDX.pm
+- PDX
+# Plug-In
+- Plug
+# diverse .pm
+- pm
+# RC-Dateien
+- RC
+
+## Umgangssprache
+
+## nicht im Wörterbuch, aber sollte drin stehen
+- Blog
+- Blogeintrag
+- Blogeinträge
+- Endbericht
+- Expertenunterricht
+- Gemeinschaftsprojekt
+- Gruppentreffen
+- Hauptdistribution
+- Hauptseite
+- Hilfeangeboten
+- nachzuschlagen
+- Objektsystem
+- Objektsystems
+- Produktionseinsatz
+- Proteinanalyse
+- rollenbasierten
+- Rollenzusammensetzung
+- Schnellreferenzkarte
+- Skriptoptionen
+- Webansicht
+
+## einzelne Fremdwörter
+- Refactoring
+- Repository
+- Repositorys
+
+## absichtlich falsch