X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Fcharnames.pm;h=9f9526b9654f5c441afb36b58ea98324572dbfae;hb=ff504b36b0f6467f64b463fd17fb34f640855abc;hp=ef1472c31d98356408ca87905adfeb3db6629177;hpb=9b5be9b5aa498a302ab752a0d1cdb335a620ede2;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/charnames.pm b/lib/charnames.pm index ef1472c..9f9526b 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -2,7 +2,7 @@ package charnames; use strict; use warnings; use File::Spec; -our $VERSION = '1.05'; +our $VERSION = '1.06'; use bytes (); # for $bytes::hint_bits @@ -167,7 +167,7 @@ sub charnames ## we know where it starts, so turn into number - ## the ordinal for the char. - $ord = hex substr($txt, $hexstart, $off[0] - $hexstart); + $ord = CORE::hex substr($txt, $hexstart, $off[0] - $hexstart); } if ($^H & $bytes::hint_bits) { # "use bytes" in effect? @@ -294,7 +294,7 @@ sub vianame my $arg = shift; - return chr hex $1 if $arg =~ /^U\+([0-9a-fA-F]+)$/; + return chr CORE::hex $1 if $arg =~ /^U\+([0-9a-fA-F]+)$/; return $vianame{$arg} if exists $vianame{$arg}; @@ -304,7 +304,7 @@ sub vianame if ($[ <= $pos) { my $posLF = rindex $txt, "\n", $pos; (my $code = substr $txt, $posLF + 1, 6) =~ tr/\t//d; - return $vianame{$arg} = hex $code; + return $vianame{$arg} = CORE::hex $code; # If $pos is at the 1st line, $posLF must be $[ - 1 (not found); # then $posLF + 1 equals to $[ (at the beginning of $txt). @@ -384,33 +384,55 @@ U+0084, and U+0099 do not have names even in ISO 6429. Since the Unicode standard uses "U+HHHH", so can you: "\N{U+263a}" is the Unicode smiley face, or "\N{WHITE SMILING FACE}". -=head1 CUSTOM TRANSLATORS +=head1 ALIASES -The mechanism of translation of C<\N{...}> escapes is general and not -hardwired into F. A module can install custom -translations (inside the scope which Cs the module) with the -following magic incantation: +A few aliases have been defined for convenience: instead of having +to use the official names - sub import { - shift; - $^H{charnames} = \&translator; - } + LINE FEED (LF) + FORM FEED (FF) + CARRIAGE RETURN (CR) + NEXT LINE (NEL) -Here translator() is a subroutine which takes C as an -argument, and returns text to insert into the string instead of the -C<\N{CHARNAME}> escape. Since the text to insert should be different -in C mode and out of it, the function should check the current -state of C-flag as in: +(yes, with parentheses) one can use - use bytes (); # for $bytes::hint_bits - sub translator { - if ($^H & $bytes::hint_bits) { - return bytes_translator(@_); - } - else { - return utf8_translator(@_); - } - } + LINE FEED + FORM FEED + CARRIAGE RETURN + NEXT LINE + LF + FF + CR + NEL + +One can also use + + BYTE ORDER MARK + BOM + +and + + ZWNJ + ZWJ + +for ZERO WIDTH NON-JOINER and ZERO WIDTH JOINER. + +For backward compatibility one can use the old names for +certain C0 and C1 controls + + old new + + HORIZONTAL TABULATION CHARACTER TABULATION + VERTICAL TABULATION LINE TABULATION + FILE SEPARATOR INFORMATION SEPARATOR FOUR + GROUP SEPARATOR INFORMATION SEPARATOR THREE + RECORD SEPARATOR INFORMATION SEPARATOR TWO + UNIT SEPARATOR INFORMATION SEPARATOR ONE + PARTIAL LINE DOWN PARTIAL LINE FORWARD + PARTIAL LINE UP PARTIAL LINE BACKWARD + +but the old names in addition to giving the character +will also give a warning about being deprecated. =head1 CUSTOM ALIASES @@ -480,55 +502,33 @@ Returns undef if the name is unknown. This works only for the standard names, and does not yet apply to custom translators. -=head1 ALIASES - -A few aliases have been defined for convenience: instead of having -to use the official names - - LINE FEED (LF) - FORM FEED (FF) - CARRIAGE RETURN (CR) - NEXT LINE (NEL) - -(yes, with parentheses) one can use - - LINE FEED - FORM FEED - CARRIAGE RETURN - NEXT LINE - LF - FF - CR - NEL - -One can also use - - BYTE ORDER MARK - BOM - -and - - ZWNJ - ZWJ - -for ZERO WIDTH NON-JOINER and ZERO WIDTH JOINER. +=head1 CUSTOM TRANSLATORS -For backward compatibility one can use the old names for -certain C0 and C1 controls +The mechanism of translation of C<\N{...}> escapes is general and not +hardwired into F. A module can install custom +translations (inside the scope which Cs the module) with the +following magic incantation: - old new + sub import { + shift; + $^H{charnames} = \&translator; + } - HORIZONTAL TABULATION CHARACTER TABULATION - VERTICAL TABULATION LINE TABULATION - FILE SEPARATOR INFORMATION SEPARATOR FOUR - GROUP SEPARATOR INFORMATION SEPARATOR THREE - RECORD SEPARATOR INFORMATION SEPARATOR TWO - UNIT SEPARATOR INFORMATION SEPARATOR ONE - PARTIAL LINE DOWN PARTIAL LINE FORWARD - PARTIAL LINE UP PARTIAL LINE BACKWARD +Here translator() is a subroutine which takes C as an +argument, and returns text to insert into the string instead of the +C<\N{CHARNAME}> escape. Since the text to insert should be different +in C mode and out of it, the function should check the current +state of C-flag as in: -but the old names in addition to giving the character -will also give a warning about being deprecated. + use bytes (); # for $bytes::hint_bits + sub translator { + if ($^H & $bytes::hint_bits) { + return bytes_translator(@_); + } + else { + return utf8_translator(@_); + } + } =head1 ILLEGAL CHARACTERS