Add the perlunitut manpage, by Juerd Waalboer

diff --git a/MANIFEST b/MANIFEST

index 715a4f7..2e9ae71 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -2963,6 +2963,7 @@ pod/perltoot.pod          Perl OO tutorial, part 1
 pod/perltrap.pod               Perl traps for the unwary
 pod/perlunicode.pod            Perl Unicode support
 pod/perluniintro.pod           Perl Unicode introduction
+pod/perlunitut.pod             Perl Unicode tutorial
 pod/perlutil.pod               utilities packaged with the Perl distribution
 pod/perlvar.pod                        Perl predefined variables
 pod/perlxs.pod                 Perl XS application programming interface
diff --git a/pod.lst b/pod.lst

index e3fb860..57014e0 100644 (file)
--- a/pod.lst
+++ b/pod.lst
@@ -78,6 +78,7 @@ h Reference Manual
   perllocale           Perl locale support
   perluniintro         Perl Unicode introduction
   perlunicode          Perl Unicode support
+  perlunitut           Perl Unicode tutorial
   perlebcdic           Considerations for running Perl on EBCDIC platforms
 
   perlsec              Perl security
diff --git a/pod/perl.pod b/pod/perl.pod

index c722d18..e00a758 100644 (file)
--- a/pod/perl.pod
+++ b/pod/perl.pod
@@ -95,6 +95,7 @@ For ease of access, the Perl manual has been split up into several sections.
     perllocale         Perl locale support
     perluniintro       Perl Unicode introduction
     perlunicode        Perl Unicode support
+    perlunitut         Perl Unicode tutorial
     perlebcdic         Considerations for running Perl on EBCDIC platforms
 
     perlsec            Perl security
diff --git a/pod/perltoc.pod b/pod/perltoc.pod

index d183bfb..1439bd9 100644 (file)
--- a/pod/perltoc.pod
+++ b/pod/perltoc.pod
@@ -3818,6 +3818,34 @@ autodetected, C<use encoding> needed to upgrade non-Latin-1 byte strings
 
 =back
 
+=head2 perlunitut - Perl Unicode Tutorial
+
+=over 4
+
+=item DESCRIPTION
+
+=over 4
+
+=item Definitions
+
+=item Your new toolkit
+
+=item I/O flow (the actual 5 minute tutorial)
+
+=item Q and A
+
+=back
+
+=item SUMMARY
+
+=item ACKNOWLEDGEMENTS
+
+=item AUTHOR
+
+=item SEE ALSO
+
+=back
+
 =head2 perlebcdic - Considerations for running Perl on EBCDIC platforms
 
 =over 4
@@ -5336,7 +5364,7 @@ PL_sv_undef X<PL_sv_undef>, PL_sv_yes X<PL_sv_yes>
 GvSV X<GvSV>, gv_const_sv X<gv_const_sv>, gv_fetchmeth X<gv_fetchmeth>,
 gv_fetchmethod_autoload X<gv_fetchmethod_autoload>, gv_fetchmeth_autoload
 X<gv_fetchmeth_autoload>, gv_stashpv X<gv_stashpv>, gv_stashpvn
-X<gv_stashpvn>, gv_stashsv X<gv_stashsv>
+X<gv_stashpvn>, gv_stashpvs X<gv_stashpvs>, gv_stashsv X<gv_stashsv>
 
 =item Handy Values
 
@@ -5351,13 +5379,13 @@ X<HeSVKEY_force>, HeSVKEY_set X<HeSVKEY_set>, HeVAL X<HeVAL>, HvNAME
 X<HvNAME>, hv_assert X<hv_assert>, hv_clear X<hv_clear>,
 hv_clear_placeholders X<hv_clear_placeholders>, hv_delete X<hv_delete>,
 hv_delete_ent X<hv_delete_ent>, hv_exists X<hv_exists>, hv_exists_ent
-X<hv_exists_ent>, hv_fetch X<hv_fetch>, hv_fetch_ent X<hv_fetch_ent>,
-hv_iterinit X<hv_iterinit>, hv_iterkey X<hv_iterkey>, hv_iterkeysv
-X<hv_iterkeysv>, hv_iternext X<hv_iternext>, hv_iternextsv
-X<hv_iternextsv>, hv_iternext_flags X<hv_iternext_flags>, hv_iterval
-X<hv_iterval>, hv_magic X<hv_magic>, hv_scalar X<hv_scalar>, hv_store
-X<hv_store>, hv_store_ent X<hv_store_ent>, hv_undef X<hv_undef>, newHV
-X<newHV>
+X<hv_exists_ent>, hv_fetch X<hv_fetch>, hv_fetchs X<hv_fetchs>,
+hv_fetch_ent X<hv_fetch_ent>, hv_iterinit X<hv_iterinit>, hv_iterkey
+X<hv_iterkey>, hv_iterkeysv X<hv_iterkeysv>, hv_iternext X<hv_iternext>,
+hv_iternextsv X<hv_iternextsv>, hv_iternext_flags X<hv_iternext_flags>,
+hv_iterval X<hv_iterval>, hv_magic X<hv_magic>, hv_scalar X<hv_scalar>,
+hv_store X<hv_store>, hv_stores X<hv_stores>, hv_store_ent X<hv_store_ent>,
+hv_undef X<hv_undef>, newHV X<newHV>
 
 =item Magical Functions
 
@@ -5372,11 +5400,11 @@ X<SvUNLOCK>
 =item Memory Management
 
 Copy X<Copy>, CopyD X<CopyD>, Move X<Move>, MoveD X<MoveD>, Newx X<Newx>,
-Newxc X<Newxc>, Newxz X<Newxz>, Poison X<Poison>, PoisonNew X<PoisonNew>,
-PoisonWith X<PoisonWith>, Renew X<Renew>, Renewc X<Renewc>, Safefree
-X<Safefree>, savepv X<savepv>, savepvn X<savepvn>, savesharedpv
-X<savesharedpv>, savesvpv X<savesvpv>, StructCopy X<StructCopy>, Zero
-X<Zero>, ZeroD X<ZeroD>
+Newxc X<Newxc>, Newxz X<Newxz>, Poison X<Poison>, PoisonFree X<PoisonFree>,
+PoisonNew X<PoisonNew>, PoisonWith X<PoisonWith>, Renew X<Renew>, Renewc
+X<Renewc>, Safefree X<Safefree>, savepv X<savepv>, savepvn X<savepvn>,
+savepvs X<savepvs>, savesharedpv X<savesharedpv>, savesvpv X<savesvpv>,
+StructCopy X<StructCopy>, Zero X<Zero>, ZeroD X<ZeroD>
 
 =item Miscellaneous Functions
 
@@ -5464,61 +5492,64 @@ X<SvPV_nolen>, SvPV_nomg X<SvPV_nomg>, SvPV_set X<SvPV_set>, SvREFCNT
 X<SvREFCNT>, SvREFCNT_dec X<SvREFCNT_dec>, SvREFCNT_inc X<SvREFCNT_inc>,
 SvREFCNT_inc_NN X<SvREFCNT_inc_NN>, SvREFCNT_inc_simple
 X<SvREFCNT_inc_simple>, SvREFCNT_inc_simple_NN X<SvREFCNT_inc_simple_NN>,
-SvREFCNT_inc_simple_void X<SvREFCNT_inc_simple_void>, SvREFCNT_inc_void
-X<SvREFCNT_inc_void>, SvREFCNT_inc_void_NN X<SvREFCNT_inc_void_NN>, SvROK
-X<SvROK>, SvROK_off X<SvROK_off>, SvROK_on X<SvROK_on>, SvRV X<SvRV>,
-SvRV_set X<SvRV_set>, SvSTASH X<SvSTASH>, SvSTASH_set X<SvSTASH_set>,
-SvTAINT X<SvTAINT>, SvTAINTED X<SvTAINTED>, SvTAINTED_off X<SvTAINTED_off>,
-SvTAINTED_on X<SvTAINTED_on>, SvTRUE X<SvTRUE>, SvTYPE X<SvTYPE>, SvUOK
-X<SvUOK>, SvUPGRADE X<SvUPGRADE>, SvUTF8 X<SvUTF8>, SvUTF8_off
-X<SvUTF8_off>, SvUTF8_on X<SvUTF8_on>, SvUV X<SvUV>, SvUVX X<SvUVX>, SvUVx
-X<SvUVx>, SvUV_nomg X<SvUV_nomg>, SvUV_set X<SvUV_set>, SvVOK X<SvVOK>,
-sv_catpvn_nomg X<sv_catpvn_nomg>, sv_catsv_nomg X<sv_catsv_nomg>,
-sv_derived_from X<sv_derived_from>, sv_report_used X<sv_report_used>,
-sv_setsv_nomg X<sv_setsv_nomg>
+SvREFCNT_inc_simple_void X<SvREFCNT_inc_simple_void>,
+SvREFCNT_inc_simple_void_NN X<SvREFCNT_inc_simple_void_NN>,
+SvREFCNT_inc_void X<SvREFCNT_inc_void>, SvREFCNT_inc_void_NN
+X<SvREFCNT_inc_void_NN>, SvROK X<SvROK>, SvROK_off X<SvROK_off>, SvROK_on
+X<SvROK_on>, SvRV X<SvRV>, SvRV_set X<SvRV_set>, SvSTASH X<SvSTASH>,
+SvSTASH_set X<SvSTASH_set>, SvTAINT X<SvTAINT>, SvTAINTED X<SvTAINTED>,
+SvTAINTED_off X<SvTAINTED_off>, SvTAINTED_on X<SvTAINTED_on>, SvTRUE
+X<SvTRUE>, SvTYPE X<SvTYPE>, SvUOK X<SvUOK>, SvUPGRADE X<SvUPGRADE>, SvUTF8
+X<SvUTF8>, SvUTF8_off X<SvUTF8_off>, SvUTF8_on X<SvUTF8_on>, SvUV X<SvUV>,
+SvUVX X<SvUVX>, SvUVx X<SvUVx>, SvUV_nomg X<SvUV_nomg>, SvUV_set
+X<SvUV_set>, SvVOK X<SvVOK>, sv_catpvn_nomg X<sv_catpvn_nomg>,
+sv_catsv_nomg X<sv_catsv_nomg>, sv_derived_from X<sv_derived_from>,
+sv_report_used X<sv_report_used>, sv_setsv_nomg X<sv_setsv_nomg>
 
 =item SV-Body Allocation
 
 looks_like_number X<looks_like_number>, newRV_noinc X<newRV_noinc>, newSV
 X<newSV>, newSVhek X<newSVhek>, newSViv X<newSViv>, newSVnv X<newSVnv>,
 newSVpv X<newSVpv>, newSVpvf X<newSVpvf>, newSVpvn X<newSVpvn>,
-newSVpvn_share X<newSVpvn_share>, newSVrv X<newSVrv>, newSVsv X<newSVsv>,
-newSVuv X<newSVuv>, sv_2bool X<sv_2bool>, sv_2cv X<sv_2cv>, sv_2io
-X<sv_2io>, sv_2iv_flags X<sv_2iv_flags>, sv_2mortal X<sv_2mortal>, sv_2nv
-X<sv_2nv>, sv_2pvbyte X<sv_2pvbyte>, sv_2pvutf8 X<sv_2pvutf8>, sv_2pv_flags
+newSVpvn_share X<newSVpvn_share>, newSVpvs X<newSVpvs>, newSVpvs_share
+X<newSVpvs_share>, newSVrv X<newSVrv>, newSVsv X<newSVsv>, newSVuv
+X<newSVuv>, sv_2bool X<sv_2bool>, sv_2cv X<sv_2cv>, sv_2io X<sv_2io>,
+sv_2iv_flags X<sv_2iv_flags>, sv_2mortal X<sv_2mortal>, sv_2nv X<sv_2nv>,
+sv_2pvbyte X<sv_2pvbyte>, sv_2pvutf8 X<sv_2pvutf8>, sv_2pv_flags
 X<sv_2pv_flags>, sv_2uv_flags X<sv_2uv_flags>, sv_backoff X<sv_backoff>,
 sv_bless X<sv_bless>, sv_catpv X<sv_catpv>, sv_catpvf X<sv_catpvf>,
 sv_catpvf_mg X<sv_catpvf_mg>, sv_catpvn X<sv_catpvn>, sv_catpvn_flags
-X<sv_catpvn_flags>, sv_catpv_mg X<sv_catpv_mg>, sv_catsv X<sv_catsv>,
-sv_catsv_flags X<sv_catsv_flags>, sv_chop X<sv_chop>, sv_clear X<sv_clear>,
-sv_cmp X<sv_cmp>, sv_cmp_locale X<sv_cmp_locale>, sv_collxfrm
-X<sv_collxfrm>, sv_copypv X<sv_copypv>, sv_dec X<sv_dec>, sv_eq X<sv_eq>,
-sv_force_normal_flags X<sv_force_normal_flags>, sv_free X<sv_free>, sv_gets
-X<sv_gets>, sv_grow X<sv_grow>, sv_inc X<sv_inc>, sv_insert X<sv_insert>,
-sv_isa X<sv_isa>, sv_isobject X<sv_isobject>, sv_len X<sv_len>, sv_len_utf8
-X<sv_len_utf8>, sv_magic X<sv_magic>, sv_magicext X<sv_magicext>,
-sv_mortalcopy X<sv_mortalcopy>, sv_newmortal X<sv_newmortal>, sv_newref
-X<sv_newref>, sv_pos_b2u X<sv_pos_b2u>, sv_pos_u2b X<sv_pos_u2b>,
-sv_pvbyten_force X<sv_pvbyten_force>, sv_pvn_force X<sv_pvn_force>,
-sv_pvn_force_flags X<sv_pvn_force_flags>, sv_pvutf8n_force
-X<sv_pvutf8n_force>, sv_reftype X<sv_reftype>, sv_replace X<sv_replace>,
-sv_reset X<sv_reset>, sv_rvweaken X<sv_rvweaken>, sv_setiv X<sv_setiv>,
-sv_setiv_mg X<sv_setiv_mg>, sv_setnv X<sv_setnv>, sv_setnv_mg
+X<sv_catpvn_flags>, sv_catpvs X<sv_catpvs>, sv_catpv_mg X<sv_catpv_mg>,
+sv_catsv X<sv_catsv>, sv_catsv_flags X<sv_catsv_flags>, sv_chop X<sv_chop>,
+sv_clear X<sv_clear>, sv_cmp X<sv_cmp>, sv_cmp_locale X<sv_cmp_locale>,
+sv_collxfrm X<sv_collxfrm>, sv_copypv X<sv_copypv>, sv_dec X<sv_dec>, sv_eq
+X<sv_eq>, sv_force_normal_flags X<sv_force_normal_flags>, sv_free
+X<sv_free>, sv_gets X<sv_gets>, sv_grow X<sv_grow>, sv_inc X<sv_inc>,
+sv_insert X<sv_insert>, sv_isa X<sv_isa>, sv_isobject X<sv_isobject>,
+sv_len X<sv_len>, sv_len_utf8 X<sv_len_utf8>, sv_magic X<sv_magic>,
+sv_magicext X<sv_magicext>, sv_mortalcopy X<sv_mortalcopy>, sv_newmortal
+X<sv_newmortal>, sv_newref X<sv_newref>, sv_pos_b2u X<sv_pos_b2u>,
+sv_pos_u2b X<sv_pos_u2b>, sv_pvbyten_force X<sv_pvbyten_force>,
+sv_pvn_force X<sv_pvn_force>, sv_pvn_force_flags X<sv_pvn_force_flags>,
+sv_pvutf8n_force X<sv_pvutf8n_force>, sv_reftype X<sv_reftype>, sv_replace
+X<sv_replace>, sv_reset X<sv_reset>, sv_rvweaken X<sv_rvweaken>, sv_setiv
+X<sv_setiv>, sv_setiv_mg X<sv_setiv_mg>, sv_setnv X<sv_setnv>, sv_setnv_mg
 X<sv_setnv_mg>, sv_setpv X<sv_setpv>, sv_setpvf X<sv_setpvf>, sv_setpvf_mg
 X<sv_setpvf_mg>, sv_setpviv X<sv_setpviv>, sv_setpviv_mg X<sv_setpviv_mg>,
-sv_setpvn X<sv_setpvn>, sv_setpvn_mg X<sv_setpvn_mg>, sv_setpv_mg
-X<sv_setpv_mg>, sv_setref_iv X<sv_setref_iv>, sv_setref_nv X<sv_setref_nv>,
-sv_setref_pv X<sv_setref_pv>, sv_setref_pvn X<sv_setref_pvn>, sv_setref_uv
-X<sv_setref_uv>, sv_setsv X<sv_setsv>, sv_setsv_flags X<sv_setsv_flags>,
-sv_setsv_mg X<sv_setsv_mg>, sv_setuv X<sv_setuv>, sv_setuv_mg
-X<sv_setuv_mg>, sv_tainted X<sv_tainted>, sv_true X<sv_true>, sv_unmagic
-X<sv_unmagic>, sv_unref_flags X<sv_unref_flags>, sv_untaint X<sv_untaint>,
-sv_upgrade X<sv_upgrade>, sv_usepvn_flags X<sv_usepvn_flags>,
-sv_utf8_decode X<sv_utf8_decode>, sv_utf8_downgrade X<sv_utf8_downgrade>,
-sv_utf8_encode X<sv_utf8_encode>, sv_utf8_upgrade X<sv_utf8_upgrade>,
-sv_utf8_upgrade_flags X<sv_utf8_upgrade_flags>, sv_vcatpvf X<sv_vcatpvf>,
-sv_vcatpvfn X<sv_vcatpvfn>, sv_vcatpvf_mg X<sv_vcatpvf_mg>, sv_vsetpvf
-X<sv_vsetpvf>, sv_vsetpvfn X<sv_vsetpvfn>, sv_vsetpvf_mg X<sv_vsetpvf_mg>
+sv_setpvn X<sv_setpvn>, sv_setpvn_mg X<sv_setpvn_mg>, sv_setpvs
+X<sv_setpvs>, sv_setpv_mg X<sv_setpv_mg>, sv_setref_iv X<sv_setref_iv>,
+sv_setref_nv X<sv_setref_nv>, sv_setref_pv X<sv_setref_pv>, sv_setref_pvn
+X<sv_setref_pvn>, sv_setref_uv X<sv_setref_uv>, sv_setsv X<sv_setsv>,
+sv_setsv_flags X<sv_setsv_flags>, sv_setsv_mg X<sv_setsv_mg>, sv_setuv
+X<sv_setuv>, sv_setuv_mg X<sv_setuv_mg>, sv_tainted X<sv_tainted>, sv_true
+X<sv_true>, sv_unmagic X<sv_unmagic>, sv_unref_flags X<sv_unref_flags>,
+sv_untaint X<sv_untaint>, sv_upgrade X<sv_upgrade>, sv_usepvn_flags
+X<sv_usepvn_flags>, sv_utf8_decode X<sv_utf8_decode>, sv_utf8_downgrade
+X<sv_utf8_downgrade>, sv_utf8_encode X<sv_utf8_encode>, sv_utf8_upgrade
+X<sv_utf8_upgrade>, sv_utf8_upgrade_flags X<sv_utf8_upgrade_flags>,
+sv_vcatpvf X<sv_vcatpvf>, sv_vcatpvfn X<sv_vcatpvfn>, sv_vcatpvf_mg
+X<sv_vcatpvf_mg>, sv_vsetpvf X<sv_vsetpvf>, sv_vsetpvfn X<sv_vsetpvfn>,
+sv_vsetpvf_mg X<sv_vsetpvf_mg>
 
 =item Unicode Support
 
@@ -10597,6 +10628,16 @@ threads->detach(), threads->self(), $thr->tid(), threads->tid(),
 threads->object($tid), threads->yield(), threads->list(),
 $thr1->equal($thr2), async BLOCK;, $thr->_handle(), threads->_handle()
 
+=item THREAD CONTEXT
+
+=over 4
+
+=item Explicit context
+
+=item Implicit context
+
+=back
+
 =item THREAD STACK SIZE
 
 threads->get_stack_size();, $size = $thr->get_stack_size();, $old_size =
@@ -12160,7 +12201,7 @@ redoop, nextop, lastop
 
 =item B::COP Methods
 
-label, stash, stashpv, file, cop_seq, arybase, line, warnings, io
+label, stash, stashpv, file, cop_seq, arybase, line, warnings, io, hints
 
 =back
 
@@ -12358,10 +12399,11 @@ B<~>
 
 B<#>I<var>, B<#>I<var>I<N>, B<#>I<Var>, B<#addr>, B<#arg>, B<#class>,
 B<#classsym>, B<#coplabel>, B<#exname>, B<#extarg>, B<#firstaddr>,
-B<#flags>, B<#flagval>, B<#hyphseq>, B<#label>, B<#lastaddr>, B<#name>,
-B<#NAME>, B<#next>, B<#nextaddr>, B<#noise>, B<#private>, B<#privval>,
-B<#seq>, B<#seqnum>, B<#opt>, B<#static>, B<#sibaddr>, B<#svaddr>,
-B<#svclass>, B<#svval>, B<#targ>, B<#targarg>, B<#targarglife>, B<#typenum>
+B<#flags>, B<#flagval>, B<#hints>, B<#hintsval>, B<#hyphseq>, B<#label>,
+B<#lastaddr>, B<#name>, B<#NAME>, B<#next>, B<#nextaddr>, B<#noise>,
+B<#private>, B<#privval>, B<#seq>, B<#seqnum>, B<#opt>, B<#static>,
+B<#sibaddr>, B<#svaddr>, B<#svclass>, B<#svval>, B<#targ>, B<#targarg>,
+B<#targarglife>, B<#typenum>
 
 =back
 
@@ -12469,8 +12511,8 @@ strict, $[, bytes, utf8, integer, re, warnings, hint_bits, warning_bits
 
 =item OPTIONS AND LINT CHECKS
 
-B<context>, B<implicit-read> and B<implicit-write>, B<bare-subs>,
-B<dollar-underscore>, B<private-names>, B<undefined-subs>,
+B<magic-diamond>, B<context>, B<implicit-read> and B<implicit-write>,
+B<bare-subs>, B<dollar-underscore>, B<private-names>, B<undefined-subs>,
 B<regexp-variables>, B<all>, B<none>
 
 =item NON LINT-CHECK OPTIONS
@@ -12479,6 +12521,11 @@ B<-u Package>
 
 =item EXTENDING LINT
 
+=item TODO
+
+while(<FH>) stomps $_, strict oo, unchecked system calls, more tests,
+validate against older perls
+
 =item BUGS
 
 =item AUTHOR
@@ -15006,40 +15053,39 @@ C<d_attribute_format>, C<d_attribute_malloc>, C<d_attribute_nonnull>,
 C<d_attribute_noreturn>, C<d_attribute_pure>, C<d_attribute_unused>,
 C<d_attribute_warn_unused_result>, C<d_bcmp>, C<d_bcopy>, C<d_bsd>,
 C<d_bsdgetpgrp>, C<d_bsdsetpgrp>, C<d_builtin_choose_expr>,
-C<d_builtin_expect>, C<d_bzero>, C<d_casti32>, C<d_castneg>, C<d_charvspr>,
-C<d_chown>, C<d_chroot>, C<d_chsize>, C<d_class>, C<d_clearenv>,
-C<d_closedir>, C<d_cmsghdr_s>, C<d_const>, C<d_copysignl>, C<d_crypt>,
-C<d_crypt_r>, C<d_csh>, C<d_ctermid_r>, C<d_ctime_r>,
-C<d_c99_variadic_macros>, C<d_cuserid>, C<d_dbl_dig>, C<d_dbminitproto>,
-C<d_difftime>, C<d_dirfd>, C<d_dirnamlen>, C<d_dlerror>, C<d_dlopen>,
-C<d_dlsymun>, C<d_dosuid>, C<d_drand48_r>, C<d_drand48proto>, C<d_dup2>,
-C<d_eaccess>, C<d_endgrent>, C<d_endgrent_r>, C<d_endhent>,
-C<d_endhostent_r>, C<d_endnent>, C<d_endnetent_r>, C<d_endpent>,
-C<d_endprotoent_r>, C<d_endpwent>, C<d_endpwent_r>, C<d_endsent>,
-C<d_endservent_r>, C<d_eofnblk>, C<d_eunice>, C<d_faststdio>, C<d_fchdir>,
-C<d_fchmod>, C<d_fchown>, C<d_fcntl>, C<d_fcntl_can_lock>, C<d_fd_macros>,
-C<d_fd_set>, C<d_fds_bits>, C<d_fgetpos>, C<d_finite>, C<d_finitel>,
-C<d_flexfnam>, C<d_flock>, C<d_flockproto>, C<d_fork>, C<d_fp_class>,
-C<d_fpathconf>, C<d_fpclass>, C<d_fpclassify>, C<d_fpclassl>,
-C<d_fpos64_t>, C<d_frexpl>, C<d_fs_data_s>, C<d_fseeko>, C<d_fsetpos>,
-C<d_fstatfs>, C<d_fstatvfs>, C<d_fsync>, C<d_ftello>, C<d_ftime>,
-C<d_futimes>, C<d_Gconvert>, C<d_getcwd>, C<d_getespwnam>, C<d_getfsstat>,
-C<d_getgrent>, C<d_getgrent_r>, C<d_getgrgid_r>, C<d_getgrnam_r>,
-C<d_getgrps>, C<d_gethbyaddr>, C<d_gethbyname>, C<d_gethent>,
-C<d_gethname>, C<d_gethostbyaddr_r>, C<d_gethostbyname_r>,
-C<d_gethostent_r>, C<d_gethostprotos>, C<d_getitimer>, C<d_getlogin>,
-C<d_getlogin_r>, C<d_getmnt>, C<d_getmntent>, C<d_getnbyaddr>,
-C<d_getnbyname>, C<d_getnent>, C<d_getnetbyaddr_r>, C<d_getnetbyname_r>,
-C<d_getnetent_r>, C<d_getnetprotos>, C<d_getpagsz>, C<d_getpbyname>,
-C<d_getpbynumber>, C<d_getpent>, C<d_getpgid>, C<d_getpgrp>, C<d_getpgrp2>,
-C<d_getppid>, C<d_getprior>, C<d_getprotobyname_r>,
-C<d_getprotobynumber_r>, C<d_getprotoent_r>, C<d_getprotoprotos>,
-C<d_getprpwnam>, C<d_getpwent>, C<d_getpwent_r>, C<d_getpwnam_r>,
-C<d_getpwuid_r>, C<d_getsbyname>, C<d_getsbyport>, C<d_getsent>,
-C<d_getservbyname_r>, C<d_getservbyport_r>, C<d_getservent_r>,
-C<d_getservprotos>, C<d_getspnam>, C<d_getspnam_r>, C<d_gettimeod>,
-C<d_gmtime_r>, C<d_gnulibc>, C<d_grpasswd>, C<d_hasmntopt>, C<d_htonl>,
-C<d_ilogbl>, C<d_inc_version_list>, C<d_index>, C<d_inetaton>,
+C<d_builtin_expect>, C<d_bzero>, C<d_c99_variadic_macros>, C<d_casti32>,
+C<d_castneg>, C<d_charvspr>, C<d_chown>, C<d_chroot>, C<d_chsize>,
+C<d_class>, C<d_clearenv>, C<d_closedir>, C<d_cmsghdr_s>, C<d_const>,
+C<d_copysignl>, C<d_crypt>, C<d_crypt_r>, C<d_csh>, C<d_ctermid_r>,
+C<d_ctime_r>, C<d_cuserid>, C<d_dbl_dig>, C<d_dbminitproto>, C<d_difftime>,
+C<d_dirfd>, C<d_dirnamlen>, C<d_dlerror>, C<d_dlopen>, C<d_dlsymun>,
+C<d_dosuid>, C<d_drand48_r>, C<d_drand48proto>, C<d_dup2>, C<d_eaccess>,
+C<d_endgrent>, C<d_endgrent_r>, C<d_endhent>, C<d_endhostent_r>,
+C<d_endnent>, C<d_endnetent_r>, C<d_endpent>, C<d_endprotoent_r>,
+C<d_endpwent>, C<d_endpwent_r>, C<d_endsent>, C<d_endservent_r>,
+C<d_eofnblk>, C<d_eunice>, C<d_faststdio>, C<d_fchdir>, C<d_fchmod>,
+C<d_fchown>, C<d_fcntl>, C<d_fcntl_can_lock>, C<d_fd_macros>, C<d_fd_set>,
+C<d_fds_bits>, C<d_fgetpos>, C<d_finite>, C<d_finitel>, C<d_flexfnam>,
+C<d_flock>, C<d_flockproto>, C<d_fork>, C<d_fp_class>, C<d_fpathconf>,
+C<d_fpclass>, C<d_fpclassify>, C<d_fpclassl>, C<d_fpos64_t>, C<d_frexpl>,
+C<d_fs_data_s>, C<d_fseeko>, C<d_fsetpos>, C<d_fstatfs>, C<d_fstatvfs>,
+C<d_fsync>, C<d_ftello>, C<d_ftime>, C<d_futimes>, C<d_Gconvert>,
+C<d_getcwd>, C<d_getespwnam>, C<d_getfsstat>, C<d_getgrent>,
+C<d_getgrent_r>, C<d_getgrgid_r>, C<d_getgrnam_r>, C<d_getgrps>,
+C<d_gethbyaddr>, C<d_gethbyname>, C<d_gethent>, C<d_gethname>,
+C<d_gethostbyaddr_r>, C<d_gethostbyname_r>, C<d_gethostent_r>,
+C<d_gethostprotos>, C<d_getitimer>, C<d_getlogin>, C<d_getlogin_r>,
+C<d_getmnt>, C<d_getmntent>, C<d_getnbyaddr>, C<d_getnbyname>,
+C<d_getnent>, C<d_getnetbyaddr_r>, C<d_getnetbyname_r>, C<d_getnetent_r>,
+C<d_getnetprotos>, C<d_getpagsz>, C<d_getpbyname>, C<d_getpbynumber>,
+C<d_getpent>, C<d_getpgid>, C<d_getpgrp>, C<d_getpgrp2>, C<d_getppid>,
+C<d_getprior>, C<d_getprotobyname_r>, C<d_getprotobynumber_r>,
+C<d_getprotoent_r>, C<d_getprotoprotos>, C<d_getprpwnam>, C<d_getpwent>,
+C<d_getpwent_r>, C<d_getpwnam_r>, C<d_getpwuid_r>, C<d_getsbyname>,
+C<d_getsbyport>, C<d_getsent>, C<d_getservbyname_r>, C<d_getservbyport_r>,
+C<d_getservent_r>, C<d_getservprotos>, C<d_getspnam>, C<d_getspnam_r>,
+C<d_gettimeod>, C<d_gmtime_r>, C<d_gnulibc>, C<d_grpasswd>, C<d_hasmntopt>,
+C<d_htonl>, C<d_ilogbl>, C<d_inc_version_list>, C<d_index>, C<d_inetaton>,
 C<d_int64_t>, C<d_isascii>, C<d_isfinite>, C<d_isinf>, C<d_isnan>,
 C<d_isnanl>, C<d_killpg>, C<d_lchown>, C<d_ldbl_dig>,
 C<d_libm_lib_version>, C<d_link>, C<d_localtime_r>, C<d_locconv>,
@@ -15701,32 +15747,6 @@ Dumper
 
 =back
 
-=head2 Devel::Arena - Perl extension for inspecting the core's arena
-structures
-
-=over 4
-
-=item SYNOPSIS
-
-=item DESCRIPTION
-
-=over 4
-
-=item EXPORT
-
-sv_stats [DONT_SHARE], shared_string_table, sizes, HEK_size STRING,
-shared_string_table_effectiveness, write_stats_at_END
-
-=back
-
-=item SEE ALSO
-
-=item AUTHOR
-
-=item COPYRIGHT AND LICENSE
-
-=back
-
 =head2 Devel::DProf - a Perl code profiler
 
 =over 4
@@ -15781,9 +15801,9 @@ shared_string_table_effectiveness, write_stats_at_END
 
 =item Perl API not supported by ppport.h
 
-perl 5.9.3, perl 5.9.2, perl 5.9.1, perl 5.9.0, perl 5.8.3, perl 5.8.1,
-perl 5.8.0, perl 5.7.3, perl 5.7.2, perl 5.7.1, perl 5.6.1, perl 5.6.0,
-perl 5.005_03, perl 5.005, perl 5.004_05, perl 5.004
+perl 5.9.4, perl 5.9.3, perl 5.9.2, perl 5.9.1, perl 5.9.0, perl 5.8.3,
+perl 5.8.1, perl 5.8.0, perl 5.7.3, perl 5.7.2, perl 5.7.1, perl 5.6.1,
+perl 5.6.0, perl 5.005_03, perl 5.005, perl 5.004_05, perl 5.004
 
 =back
 
@@ -15857,59 +15877,6 @@ perl 5.005_03, perl 5.005, perl 5.004_05, perl 5.004
 
 =back
 
-=head2 Devel::Size - Perl extension for finding the memory usage of Perl
-variables
-
-=over 4
-
-=item SYNOPSIS
-
-=item DESCRIPTION
-
-=item FUNCTIONS
-
-=over 4
-
-=item size($ref)
-
-=item total_size($ref)
-
-=back
-
-=item EXPORT
-
-=item UNDERSTANDING MEMORY ALLOCATION
-
-=over 4
-
-=item The C library
-
-=item Perl
-
-=back
-
-=item DANGERS
-
-=item Messages: texts originating from this module.
-
-=over 4
-
-=item Errors
-
-=item warnings
-
-=back
-
-=item BUGS
-
-=item AUTHOR
-
-=item COPYRIGHT
-
-=item SEE ALSO
-
-=back
-
 =head2 Digest - Modules that calculate message digests
 
 =over 4
@@ -15990,15 +15957,16 @@ B<sha224_base64($data, ...)>, B<sha256_base64($data, ...)>,
 B<sha384_base64($data, ...)>, B<sha512_base64($data, ...)>, B<new($alg)>,
 B<reset($alg)>, B<hashsize>, B<algorithm>, B<clone>, B<add($data, ...)>,
 B<add_bits($data, $nbits)>, B<add_bits($bits)>, B<addfile(*FILE)>,
-B<dump($filename)>, B<load($filename)>, B<digest>, B<hexdigest>,
-B<b64digest>, B<hmac_sha1($data, $key)>, B<hmac_sha224($data, $key)>,
-B<hmac_sha256($data, $key)>, B<hmac_sha384($data, $key)>,
-B<hmac_sha512($data, $key)>, B<hmac_sha1_hex($data, $key)>,
-B<hmac_sha224_hex($data, $key)>, B<hmac_sha256_hex($data, $key)>,
-B<hmac_sha384_hex($data, $key)>, B<hmac_sha512_hex($data, $key)>,
-B<hmac_sha1_base64($data, $key)>, B<hmac_sha224_base64($data, $key)>,
-B<hmac_sha256_base64($data, $key)>, B<hmac_sha384_base64($data, $key)>,
-B<hmac_sha512_base64($data, $key)>
+B<addfile($filename [, $mode])>, B<"b">     read file in binary mode,
+B<"p">       use portable mode, B<dump($filename)>, B<load($filename)>,
+B<digest>, B<hexdigest>, B<b64digest>, B<hmac_sha1($data, $key)>,
+B<hmac_sha224($data, $key)>, B<hmac_sha256($data, $key)>,
+B<hmac_sha384($data, $key)>, B<hmac_sha512($data, $key)>,
+B<hmac_sha1_hex($data, $key)>, B<hmac_sha224_hex($data, $key)>,
+B<hmac_sha256_hex($data, $key)>, B<hmac_sha384_hex($data, $key)>,
+B<hmac_sha512_hex($data, $key)>, B<hmac_sha1_base64($data, $key)>,
+B<hmac_sha224_base64($data, $key)>, B<hmac_sha256_base64($data, $key)>,
+B<hmac_sha384_base64($data, $key)>, B<hmac_sha512_base64($data, $key)>
 
 =item SEE ALSO
 
@@ -26222,14 +26190,14 @@ path not available
 
 =item SEE ALSO
 
-=item AUTHOR
+=item AUTHORS
 
 =item BUGS
 
 =item SUPPORT
 
 AnnoCPAN: Annotated CPAN documentation, CPAN Ratings, RT: CPAN's request
-tracker, Search CPAN
+tracker, Search CPAN, Kobes' CPAN Search, Perl Documentation
 
 =item LICENSE
 
@@ -26281,14 +26249,14 @@ path not available
 
 =item SEE ALSO
 
-=item AUTHOR
+=item AUTHORS
 
 =item BUGS
 
 =item SUPPORT
 
 AnnoCPAN: Annotated CPAN documentation, CPAN Ratings, RT: CPAN's request
-tracker, Search CPAN
+tracker, Search CPAN, Kobes' CPAN Search, Perl Documentation
 
 =item LICENSE
 
diff --git a/pod/perlunitut.pod b/pod/perlunitut.pod

new file mode 100644 (file)

index 0000000..ae8d0b1
--- /dev/null
+++ b/pod/perlunitut.pod
@@ -0,0 +1,425 @@
+=head1 NAME
+
+perlunitut - Perl Unicode Tutorial
+
+=head1 DESCRIPTION
+
+The days of just flinging strings around are over. It's well established that
+modern programs need to be capable of communicating funny accented letters, and
+things like euro symbols. This means that programmers need new habits. It's
+easy to program Unicode capable software, but it does require discipline to do
+it right.
+
+There's a lot to know about character sets, and text encodings. It's probably
+best to spend a full day learning all this, but the basics can be learned in
+minutes. 
+
+These are not the very basics, though. It is assumed that you already
+know the difference between bytes and characters, and realise (and accept!)
+that there are many different character sets and encodings, and that your
+program has to be explicit about them. Recommended reading is "The Absolute
+Minimum Every Software Developer Absolutely, Positively Must Know About Unicode
+and Character Sets (No Excuses!)" by Joel Spolsky, at
+L<http://joelonsoftware.com/articles/Unicode.html>.
+
+This tutorial speaks in rather absolute terms, and provides only a limited view
+of the wealth of character string related features that Perl has to offer. For
+most projects, this information will probably suffice.
+
+=head2 Definitions
+
+It's important to set a few things straight first. This is the most important
+part of this tutorial. This view may conflict with other information that you
+may have found on the web, but that's mostly because many sources are wrong.
+
+You may have to re-read this entire section a few times...
+
+=head3 Unicode
+
+B<Unicode> is a character set with room for lots of characters. The ordinal
+value of a character is called a B<code point>. 
+
+There are many, many code points, but computers work with bytes, and a byte can
+have only 256 values. Unicode has many more characters, so you need a method
+to make these accessible.
+
+Unicode is encoded using several competing encodings, of which UTF-8 is the
+most used. In a Unicode encoding, multiple subsequent bytes can be used to
+store a single code point, or simply: character.
+
+=head3 UTF-8
+
+B<UTF-8> is a Unicode encoding. Many people think that Unicode and UTF-8 are
+the same thing, but they're not. There are more Unicode encodings, but much of
+the world has standardized on UTF-8. 
+
+UTF-8 treats the first 128 codepoints, 0..127, the same as ASCII. They take
+only one byte per character. All other characters are encoded as two or more
+(up to six) bytes using a complex scheme. Fortunately, Perl handles this for
+us, so we don't have to worry about this.
+
+=head3 Text strings (character strings)
+
+B<Text strings>, or B<character strings> are made of characters. Bytes are
+irrelevant here, and so are encodings. Each character is just that: the
+character.
+
+On a text string, you would do things like:
+
+    $text =~ s/foo/bar/;
+    if ($string =~ /^\d+$/) { ... }
+    $text = ucfirst $text;
+    my $character_count = length $text;
+
+The value of a character (C<ord>, C<chr>) is the corresponding Unicode code
+point.
+
+=head3 Binary strings (byte strings)
+
+B<Binary strings>, or B<byte strings> are made of bytes. Here, you don't have
+characters, just bytes. All communication with the outside world (anything
+outside of your current Perl process) is done in binary.
+
+On a binary string, you would do things like:
+
+    my (@length_content) = unpack "(V/a)*", $binary;
+    $binary =~ s/\x00\x0F/\xFF\xF0/;  # for the brave :)
+    print {$fh} $binary;
+    my $byte_count = length $binary;
+
+=head3 Encoding
+
+B<Encoding> (as a verb) is the conversion from I<text> to I<binary>. To encode,
+you have to supply the target encoding, for example C<iso-8859-1> or C<UTF-8>.
+Some encodings, like the C<iso-8859> ("latin") range, do not support the full
+Unicode standard; characters that can't be represented are lost in the
+conversion.
+
+=head3 Decoding
+
+B<Decoding> is the conversion from I<binary> to I<text>. To decode, you have to
+know what encoding was used during the encoding phase. And most of all, it must
+be something decodable. It doesn't make much sense to decode a PNG image into a
+text string.
+
+=head3 Internal format
+
+Perl has an B<internal format>, an encoding that it uses to encode text strings
+so it can store them in memory. All text strings are in this internal format.
+In fact, text strings are never in any other format!
+
+You shouldn't worry about what this format is, because conversion is
+automatically done when you decode or encode.
+
+=head2 Your new toolkit
+
+Add to your standard heading the following line:
+
+    use Encode qw(encode decode);
+
+Or, if you're lazy, just:
+
+    use Encode;
+
+=head2 I/O flow (the actual 5 minute tutorial)
+
+The typical input/output flow of a program is:
+
+    1. Receive and decode
+    2. Process
+    3. Encode and output
+
+If your input is binary, and is supposed to remain binary, you shouldn't decode
+it to a text string, of course. But in all other cases, you should decode it.
+
+Decoding can't happen reliably if you don't know how the data was encoded. If
+you get to choose, it's a good idea to standardize on UTF-8.
+
+    my $foo   = decode('UTF-8', get 'http://example.com/');
+    my $bar   = decode('ISO-8859-1', readline STDIN);
+    my $xyzzy = decode('Windows-1251', $cgi->param('foo'));
+
+Processing happens as you knew before. The only difference is that you're now
+using characters instead of bytes. That's very useful if you use things like
+C<substr>, or C<length>.
+
+It's important to realize that there are no bytes in a text string. Of course,
+Perl has its internal encoding to store the string in memory, but ignore that.
+If you have to do anything with the number of bytes, it's probably best to move
+that part to step 3, just after you've encoded the string. Then you know
+exactly how many bytes it will be in the destination string.
+
+The syntax for encoding text strings to binary strings is as simple as decoding:
+
+    $body = encode('UTF-8', $body);
+
+If you needed to know the length of the string in bytes, now's the perfect time
+for that. Because C<$body> is now a byte string, C<length> will report the
+number of bytes, instead of the number of characters. The number of
+characters is no longer known, because characters only exist in text strings.
+
+    my $byte_count = length $body;
+
+And if the protocol you're using supports a way of letting the recipient know
+which character encoding you used, please help the receiving end by using that
+feature! For example, E-mail and HTTP support MIME headers, so you can use the
+C<Content-Type> header. They can also have C<Content-Length> to indicate the
+number of I<bytes>, which is always a good idea to supply if the number is
+known.
+
+    "Content-Type: text/plain; charset=UTF-8",
+    "Content-Length: $byte_count"
+
+=head2 Q and A
+
+=head3 This isn't really a Unicode tutorial, is it?
+
+No, Perl has an abstracted interface for all supported character encodings, so
+this is actually a generic C<Encode> tutorial. But many people think that
+Unicode is special and magical, and I didn't want to disappoint them, so I
+decided to call this document a Unicode tutorial.
+
+=head3 What about binary data, like images?
+
+Well, apart from a bare C<binmode $fh>, you shouldn't treat them specially.
+(The binmode is needed because otherwise Perl may convert line endings on Win32
+systems.)
+
+Be careful, though, to never combine text strings with binary strings. If you
+need text in a binary stream, encode your text strings first using the
+appropriate encoding, then join them with binary strings. See also: "What if I
+don't encode?".
+
+=head3 What about the UTF-8 flag?
+
+Please, unless you're hacking the internals, or debugging weirdness, don't
+think about the UTF-8 flag at all. That means that you very probably shouldn't
+use C<is_utf8>, C<_utf8_on> or C<_utf8_off> at all.
+
+Perl's internal format happens to be UTF-8. Unfortunately, Perl can't keep a
+secret, so everyone knows about this.  That is the source of much confusion.
+It's better to pretend that the internal format is some unknown encoding,
+and that you always have to encode and decode explicitly.
+
+=head3 When should I decode or encode?
+
+Whenever you're communicating with anything that is external to your perl
+process, like a database, a text file, a socket, or another program. Even if
+the thing you're communicating with is also written in Perl.
+
+=head3 What if I don't decode?
+
+Whenever your encoded, binary string is used together with a text string, Perl
+will assume that your binary string was encoded with ISO-8859-1, also known as
+latin-1. If it wasn't latin-1, then your data is unpleasantly converted. For
+example, if it was UTF-8, the individual bytes of multibyte characters are seen
+as separate characters, and then again converted to UTF-8. Such double encoding
+can be compared to double HTML encoding (C<&amp;gt;>), or double URI encoding
+(C<%253E>).
+
+This silent implicit decoding is known as "upgrading". That may sound
+positive, but it's best to avoid it.
+
+=head3 What if I don't encode?
+
+Your text string will be sent using the bytes in Perl's internal format. In
+some cases, Perl will warn you that you're doing something wrong, with a
+friendly warning:
+
+    Wide character in print at example.pl line 2.
+
+Because the internal format is often UTF-8, these bugs are hard to spot,
+because UTF-8 is usually the encoding you wanted! But don't be lazy, and don't
+use the fact that Perl's internal format is UTF-8 to your advantage. Encode
+explicitly to avoid weird bugs, and to show to maintenance programmers that you
+thought this through.
+
+=head3 Is there a way to automatically decode or encode?
+
+If all data that comes from a certain handle is encoded in exactly the same
+way, you can tell the PerlIO system to automatically decode everything, with
+the C<encoding> layer. If you do this, you can't accidentally forget to decode
+or encode anymore, on things that use the layered handle.
+
+You can provide this layer when C<open>ing the file:
+
+    open my $fh, '>:encoding(UTF-8)', $filename;  # auto encoding on write
+    open my $fh, '<:encoding(UTF-8)', $filename;  # auto decoding on read
+
+Or if you already have an open filehandle:
+
+    binmode $fh, ':encoding(UTF-8)';
+
+Some database drivers for DBI can also automatically encode and decode, but
+that is typically limited to the UTF-8 encoding, because they cheat.
+
+=head3 Cheat?! Tell me, how can I cheat?
+
+Well, because Perl's internal format is UTF-8, you can just skip the encoding
+or decoding step, and manipulate the UTF-8 flag directly.
+
+Instead of C<:encoding(UTF-8)>, you can simply use C<:utf8>. This is widely
+accepted as good behavior.
+
+Instead of C<decode> and C<encode>, you could use C<_utf8_on> and C<_utf8_off>.
+But this is, contrary to C<:utf8>, considered bad style.
+
+There are some shortcuts for oneliners; see C<-C> in L<perlrun>.
+
+=head3 What if I don't know which encoding was used?
+
+Do whatever you can to find out, and if you have to: guess. (Don't forget to
+document your guess with a comment.)
+
+You could open the document in a web browser, and change the character set or
+character encoding until you can visually confirm that all characters look the
+way they should.
+
+There is no way to reliably detect the encoding automatically, so if people
+keep sending you data without charset indication, you may have to educate them.
+
+=head3 Can I use Unicode in my Perl sources?
+
+Yes, you can! If your sources are UTF-8 encoded, you can indicate that with the
+C<use utf8> pragma.
+
+    use utf8;
+
+This doesn't do anything to your input, or to your output. It only influences
+the way your sources are read. You can use Unicode in string literals, in
+identifiers (but they still have to be "word characters" according to C<\w>),
+and even in custom delimiters.
+
+=head3 Data::Dumper doesn't restore the UTF-8 flag; is it broken?
+
+No, Data::Dumper's Unicode abilities are as they should be. There have been
+some complaints that it should restore the UTF-8 flag when the data is read
+again with C<eval>. However, you should really not look at the flag, and
+nothing indicates that Data::Dumper should break this rule.
+
+Here's what happens: when Perl reads in a string literal, it sticks to 8 bit
+encoding as long as it can. (But perhaps originally it was internally encoded
+as UTF-8, when you dumped it.) When it has to give that up because other
+characters are added to the text string, it silently upgrades the string to
+UTF-8. 
+
+If you properly encode your strings for output, none of this is of your
+concern, and you can just C<eval> dumped data as always.
+
+=head3 How can I determine if a string is a text string or a binary string?
+
+You can't. Some use the UTF-8 flag for this, but that's misuse, and makes well
+behaved modules like Data::Dumper look bad. The flag is useless for this
+purpose, because it's off when an 8 bit encoding (by default ISO-8859-1) is
+used to store the string.
+
+This is something you, the programmer, has to keep track of; sorry. You could
+consider adopting a kind of "Hungarian notation" to help with this.
+
+=head3 How do I convert from encoding FOO to encoding BAR?
+
+By first converting the FOO-encoded byte string to a text string, and then the
+text string to a BAR-encoded byte string:
+
+    my $text_string = decode('FOO', $foo_string);
+    my $bar_string  = encode('BAR', $text_string);
+
+or by skipping the text string part, and going directly from one binary
+encoding to the other:
+
+    use Encode qw(from_to);
+    from_to($string, 'FOO', 'BAR');  # changes contents of $string
+
+or by letting automatic decoding and encoding do all the work:
+
+    open my $foofh, '<:encoding(FOO)', 'example.foo.txt';
+    open my $barfh, '>:encoding(BAR)', 'example.bar.txt';
+    print { $barfh } $_ while <$foofh>;
+
+=head3 What about the C<use bytes> pragma?
+
+Don't use it. It makes no sense to deal with bytes in a text string, and it
+makes no sense to deal with characters in a byte string. Do the proper
+conversions (by decoding/encoding), and things will work out well: you get
+character counts for decoded data, and byte counts for encoded data.
+
+C<use bytes> is usually a failed attempt to do something useful. Just forget
+about it.
+
+=head3 What are C<decode_utf8> and C<encode_utf8>?
+
+These are alternate syntaxes for C<decode('utf8', ...)> and C<encode('utf8',
+...)>.
+
+=head3 What's the difference between C<UTF-8> and C<utf8>?
+
+C<UTF-8> is the official standard. C<utf8> is Perl's way of being liberal in
+what it accepts. If you have to communicate with things that aren't so liberal,
+you may want to consider using C<UTF-8>. If you have to communicate with things
+that are too liberal, you may have to use C<utf8>. The full explanation is in
+L<Encode>.
+
+C<UTF-8> is internally known as C<utf-8-strict>. This tutorial uses UTF-8
+consistently, even where utf8 is actually used internally, because the
+distinction can be hard to make, and is mostly irrelevant.
+
+Okay, if you insist: the "internal format" is utf8, not UTF-8. (When it's not
+some other encoding.)
+
+=head3 I lost track; what encoding is the internal format really?
+
+It's good that you lost track, because you shouldn't depend on the internal
+format being any specific encoding. But since you asked: by default, the
+internal format is either ISO-8859-1 (latin-1), or utf8, depending on the
+history of the string.
+
+Perl knows how it stored the string internally, and will use that knowledge
+when you C<encode>. In other words: don't try to find out what the internal
+encoding for a certain string is, but instead just encode it into the encoding
+that you want.
+
+=head3 What character encodings does Perl support?
+
+To find out which character encodings your Perl supports, run:
+
+    perl -MEncode -le "print for Encode->encodings(':all')"
+
+=head3 Which version of perl should I use?
+
+Well, if you can, upgrade to the most recent, but certainly C<5.8.1> or newer.
+This tutorial is based on the status quo as of C<5.8.7>.
+
+You should also check your modules, and upgrade them if necessary. For example,
+HTML::Entities requires version >= 1.32 to function correctly, even though the
+changelog is silent about this.
+
+=head1 SUMMARY
+
+Decode everything you receive, encode everything you send out. (If it's text
+data.)
+
+=head1 ACKNOWLEDGEMENTS
+
+Thanks to Johan Vromans from Squirrel Consultancy. His UTF-8 rants during the
+Amsterdam Perl Mongers meetings got me interested and determined to find out
+how to use character encodings in Perl in ways that don't break easily.
+
+Thanks to Gerard Goossen from TTY. His presentation "UTF-8 in the wild" (Dutch
+Perl Workshop 2006) inspired me to publish my thoughts and write this tutorial.
+
+Thanks to the people who asked about this kind of stuff in several Perl IRC
+channels, and have constantly reminded me that a simpler explanation was
+needed.
+
+Thanks to the people who reviewed this document for me, before it went public.
+They are: Benjamin Smith, Jan-Pieter Cornet, Johan Vromans, Lukas Mai, Nathan
+Gray.
+
+=head1 AUTHOR
+
+Juerd Waalboer <juerd@cpan.org>
+
+=head1 SEE ALSO
+
+L<perlunicode>, L<perluniintro>, L<Encode>
+
diff --git a/vms/descrip_mms.template b/vms/descrip_mms.template

index 6a5bc9b..29c1e44 100644 (file)
--- a/vms/descrip_mms.template
+++ b/vms/descrip_mms.template
@@ -411,9 +411,10 @@ pod21 = [.lib.pods]perlpragma.pod [.lib.pods]perlqnx.pod [.lib.pods]perlre.pod [
 pod22 = [.lib.pods]perlreref.pod [.lib.pods]perlretut.pod [.lib.pods]perlriscos.pod [.lib.pods]perlrun.pod [.lib.pods]perlsec.pod [.lib.pods]perlsolaris.pod
 pod23 = [.lib.pods]perlstyle.pod [.lib.pods]perlsub.pod [.lib.pods]perlsymbian.pod [.lib.pods]perlsyn.pod [.lib.pods]perlthrtut.pod [.lib.pods]perltie.pod
 pod24 = [.lib.pods]perltoc.pod [.lib.pods]perltodo.pod [.lib.pods]perltooc.pod [.lib.pods]perltoot.pod [.lib.pods]perltrap.pod [.lib.pods]perltru64.pod
-pod25 = [.lib.pods]perltw.pod [.lib.pods]perlunicode.pod [.lib.pods]perluniintro.pod [.lib.pods]perlutil.pod [.lib.pods]perluts.pod [.lib.pods]perlvar.pod
-pod26 = [.lib.pods]perlvmesa.pod [.lib.pods]perlvms.pod [.lib.pods]perlvos.pod [.lib.pods]perlwin32.pod [.lib.pods]perlxs.pod [.lib.pods]perlxstut.pod
-pod = $(pod0) $(pod1) $(pod2) $(pod3) $(pod4) $(pod5) $(pod6) $(pod7) $(pod8) $(pod9) $(pod10) $(pod11) $(pod12) $(pod13) $(pod14) $(pod15) $(pod16) $(pod17) $(pod18) $(pod19) $(pod20) $(pod21) $(pod22) $(pod23) $(pod24) $(pod25) $(pod26)
+pod25 = [.lib.pods]perltw.pod [.lib.pods]perlunicode.pod [.lib.pods]perluniintro.pod [.lib.pods]perlunitut.pod [.lib.pods]perlutil.pod [.lib.pods]perluts.pod
+pod26 = [.lib.pods]perlvar.pod [.lib.pods]perlvmesa.pod [.lib.pods]perlvms.pod [.lib.pods]perlvos.pod [.lib.pods]perlwin32.pod [.lib.pods]perlxs.pod
+pod27 = [.lib.pods]perlxstut.pod
+pod = $(pod0) $(pod1) $(pod2) $(pod3) $(pod4) $(pod5) $(pod6) $(pod7) $(pod8) $(pod9) $(pod10) $(pod11) $(pod12) $(pod13) $(pod14) $(pod15) $(pod16) $(pod17) $(pod18) $(pod19) $(pod20) $(pod21) $(pod22) $(pod23) $(pod24) $(pod25) $(pod26) $(pod27)
 
 # Would be useful to automate the generation of this rule from pod/buildtoc
 # Plus its corresponding delete in the clean target.
@@ -1254,6 +1255,10 @@ preplibrary : $(MINIPERL_EXE) $(LIBPREREQ)
        @ If F$Search("[.lib]pods.dir").eqs."" Then Create/Directory [.lib.pods]
        Copy/NoConfirm/Log $(MMS$SOURCE) [.lib.pods]
 
+[.lib.pods]perlunitut.pod : [.pod]perlunitut.pod
+       @ If F$Search("[.lib]pods.dir").eqs."" Then Create/Directory [.lib.pods]
+       Copy/NoConfirm/Log $(MMS$SOURCE) [.lib.pods]
+
 [.lib.pods]perlutil.pod : [.pod]perlutil.pod
        @ If F$Search("[.lib]pods.dir").eqs."" Then Create/Directory [.lib.pods]
        Copy/NoConfirm/Log $(MMS$SOURCE) [.lib.pods]
diff --git a/win32/pod.mak b/win32/pod.mak

index 1993d71..5f3bf61 100644 (file)
--- a/win32/pod.mak
+++ b/win32/pod.mak
@@ -120,6 +120,7 @@ POD = \
        perltrap.pod    \
        perlunicode.pod \
        perluniintro.pod        \
+       perlunitut.pod  \
        perlutil.pod    \
        perlvar.pod     \
        perlxs.pod      \
MANIFEST		patch \| blob \| blame \| history
pod.lst		patch \| blob \| blame \| history
pod/perl.pod		patch \| blob \| blame \| history
pod/perltoc.pod		patch \| blob \| blame \| history
pod/perlunitut.pod	[new file with mode: 0644]	patch \| blob
vms/descrip_mms.template		patch \| blob \| blame \| history
win32/pod.mak		patch \| blob \| blame \| history