From: Nicholas Clark Date: Sun, 24 Jun 2007 15:46:40 +0000 (+0000) Subject: s/\bunicode\b/Unicode/; # For everything not dual life X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=38a44b824c7566670d69f5e214106e1866ce72fe;p=p5sagit%2Fp5-mst-13.2.git s/\bunicode\b/Unicode/; # For everything not dual life p4raw-id: //depot/perl@31455 --- diff --git a/dump.c b/dump.c index 212f720..14e3c48 100644 --- a/dump.c +++ b/dump.c @@ -175,9 +175,9 @@ will also be escaped. Normally the SV will be cleared before the escaped string is prepared, but when PERL_PV_ESCAPE_NOCLEAR is set this will not occur. -If PERL_PV_ESCAPE_UNI is set then the input string is treated as unicode, +If PERL_PV_ESCAPE_UNI is set then the input string is treated as Unicode, if PERL_PV_ESCAPE_UNI_DETECT is set then the input string is scanned -using C to determine if it is unicode. +using C to determine if it is Unicode. If PERL_PV_ESCAPE_ALL is set then all input chars will be output using C<\x01F1> style escapes, otherwise only chars above 255 will be @@ -214,7 +214,7 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str, STRLEN wrote = 0; /* chars written so far */ STRLEN chsize = 0; /* size of data to be written */ STRLEN readsize = 1; /* size of data just read */ - bool isuni= flags & PERL_PV_ESCAPE_UNI ? 1 : 0; /* is this unicode */ + bool isuni= flags & PERL_PV_ESCAPE_UNI ? 1 : 0; /* is this Unicode */ const char *pv = str; const char * const end = pv + count; /* end of string */ octbuf[0] = esc; diff --git a/ext/Data/Dumper/Dumper.pm b/ext/Data/Dumper/Dumper.pm index 1c1fb6d..19ac1c4 100644 --- a/ext/Data/Dumper/Dumper.pm +++ b/ext/Data/Dumper/Dumper.pm @@ -9,7 +9,7 @@ package Data::Dumper; -$VERSION = '2.121_12'; +$VERSION = '2.121_13'; #$| = 1; @@ -498,7 +498,7 @@ sub _dump { } else { # string if ($s->{useqq} or $val =~ tr/\0-\377//c) { - # Fall back to qq if there's unicode + # Fall back to qq if there's Unicode $out .= qquote($val, $s->{useqq}); } else { diff --git a/pod/perlapi.pod b/pod/perlapi.pod index e4ae012..172a51a 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -848,9 +848,9 @@ will also be escaped. Normally the SV will be cleared before the escaped string is prepared, but when PERL_PV_ESCAPE_NOCLEAR is set this will not occur. -If PERL_PV_ESCAPE_UNI is set then the input string is treated as unicode, +If PERL_PV_ESCAPE_UNI is set then the input string is treated as Unicode, if PERL_PV_ESCAPE_UNI_DETECT is set then the input string is scanned -using C to determine if it is unicode. +using C to determine if it is Unicode. If PERL_PV_ESCAPE_ALL is set then all input chars will be output using C<\x01F1> style escapes, otherwise only chars above 255 will be @@ -6771,7 +6771,7 @@ Found in file utf8.c X Bottom level UTF-8 decode routine. -Returns the unicode code point value of the first character in the string C +Returns the Unicode code point value of the first character in the string C which is assumed to be in UTF-8 encoding and no longer than C; C will be set to the length, in bytes, of that character. diff --git a/pod/perldiag.pod b/pod/perldiag.pod index e118220..d0cedfc 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1881,7 +1881,7 @@ of Perl are likely to eliminate these arbitrary limitations. =item Ignoring %s in character class in regex; marked by <-- HERE in m/%s/ -(W) Named unicode character escapes (\N{...}) may return multi-char +(W) Named Unicode character escapes (\N{...}) may return multi-char or zero length sequences. When such an escape is used in a character class its behaviour is not well defined. Check that the correct escape has been used, and the correct charname handler is in scope. diff --git a/pod/perlhack.pod b/pod/perlhack.pod index 2ded7f9..d270784 100644 --- a/pod/perlhack.pod +++ b/pod/perlhack.pod @@ -1925,7 +1925,7 @@ we can write the more sensible (see L for a full explanation of is() and other testing functions). is( "1.20.300.4000", sprintf "%vd", pack("U*",1,20,300,4000), - "U* produces unicode" ); + "U* produces Unicode" ); Now we'll test that we got that space-at-the-beginning business right: @@ -1936,7 +1936,7 @@ And finally we'll test that we don't make Unicode strings if C is B the first active format: isnt( v1.20.300.4000, sprintf "%vd", pack("C0U*",1,20,300,4000), - "U* not first isn't unicode" ); + "U* not first isn't Unicode" ); Mustn't forget to change the number of tests which appears at the top, or else the automated tester will get confused. This will either look diff --git a/pod/perlpacktut.pod b/pod/perlpacktut.pod index 7c52d64..73b2f43 100644 --- a/pod/perlpacktut.pod +++ b/pod/perlpacktut.pod @@ -660,7 +660,7 @@ Usually you'll want to pack or unpack UTF-8 strings: Please note: in the general case, you're better off using Encode::decode_utf8 to decode a UTF-8 encoded byte string to a Perl -unicode string, and Encode::encode_utf8 to encode a Perl unicode string +Unicode string, and Encode::encode_utf8 to encode a Perl Unicode string to UTF-8 bytes. These functions provide means of handling invalid byte sequences and generally have a friendlier interface. diff --git a/pod/perlre.pod b/pod/perlre.pod index 7571729..0f9ded3 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -261,7 +261,7 @@ X X X X optionally be wrapped in curly brackets for safer parsing. \g{name} Named backreference \k Named backreference - \N{name} Named unicode character, or unicode escape + \N{name} Named Unicode character, or Unicode escape \x12 Hexadecimal escape sequence \x{1234} Long hexadecimal escape sequence \K Keep the stuff left of the \K, don't include it in $& diff --git a/pod/perlretut.pod b/pod/perlretut.pod index da3e82c..360ee73 100644 --- a/pod/perlretut.pod +++ b/pod/perlretut.pod @@ -1841,7 +1841,7 @@ substituted. With the advent of 5.6.0, Perl regexps can handle more than just the standard ASCII character set. Perl now supports I, a standard for representing the alphabets from virtually all of the world's written -languages, and a host of symbols. Perl's text strings are unicode strings, so +languages, and a host of symbols. Perl's text strings are Unicode strings, so they can contain characters with a value (codepoint or character number) higher than 255 @@ -1890,7 +1890,7 @@ A list of full names is found in the file NamesList.txt in the lib/perl5/X.X.X/unicore directory (where X.X.X is the perl version number as it is installed on your system). -The answer to requirement 2), as of 5.6.0, is that a regexp uses unicode +The answer to requirement 2), as of 5.6.0, is that a regexp uses Unicode characters. Internally, this is encoded to bytes using either UTF-8 or a native 8 bit encoding, depending on the history of the string, but conceptually it is a sequence of characters, not bytes. See diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index c913047..a6f748e 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -53,7 +53,7 @@ ISO 8859-1 or other eight-bit encodings.) =item C needed to upgrade non-Latin-1 byte strings -By default, there is a fundamental asymmetry in Perl's unicode model: +By default, there is a fundamental asymmetry in Perl's Unicode model: implicit upgrading from byte strings to Unicode strings assumes that they were encoded in I, but Unicode strings are downgraded with UTF-8 encoding. This happens because the first 256 diff --git a/regcomp.c b/regcomp.c index bfa2c2e..5df69d4 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1356,7 +1356,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs have unique chars. We use an array of integers to represent the character codes 0..255 - (trie->charmap) and we use a an HV* to store unicode characters. We use the + (trie->charmap) and we use a an HV* to store Unicode characters. We use the native representation of the character value as the key and IV's for the coded index. @@ -4185,7 +4185,7 @@ redo_first_pass: return(NULL); } if (RExC_utf8 && !RExC_orig_utf8) { - /* It's possible to write a regexp in ascii that represents unicode + /* It's possible to write a regexp in ascii that represents Unicode codepoints outside of the byte range, such as via \x{100}. If we detect such a sequence we have to convert the entire pattern to utf8 and then recompile, as our sizing calculation will have been based @@ -6438,7 +6438,7 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep) /* RExC_parse points at the beginning brace, endbrace points at the last */ if ( name[0]=='U' && name[1]=='+' ) { - /* its a "unicode hex" notation {U+89AB} */ + /* its a "Unicode hex" notation {U+89AB} */ I32 fl = PERL_SCAN_ALLOW_UNDERSCORES | PERL_SCAN_DISALLOW_PREFIX | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0); @@ -7147,7 +7147,7 @@ tryagain: case 'h': case 'H': /* HORIZWS */ case 'k': case 'K': /* named backref, keep marker */ case 'N': /* named char sequence */ - case 'p': case 'P': /* unicode property */ + case 'p': case 'P': /* Unicode property */ case 'R': /* LNBREAK */ case 's': case 'S': /* space class */ case 'v': case 'V': /* VERTWS */ diff --git a/regcomp.h b/regcomp.h index fae3386..8dbeaf1 100644 --- a/regcomp.h +++ b/regcomp.h @@ -492,7 +492,7 @@ END_EXTERN_C * n - Root of op tree for (?{EVAL}) item * o - Start op for (?{EVAL}) item * p - Pad for (?{EVAL}) item - * s - swash for unicode-style character class, and the multicharacter + * s - swash for Unicode-style character class, and the multicharacter * strings resulting from casefolding the single-character entries * in the character class * t - trie struct diff --git a/regexec.c b/regexec.c index a4c5aee..709eef2 100644 --- a/regexec.c +++ b/regexec.c @@ -1481,8 +1481,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, U8 **points; /* map of where we were in the input string when reading a given char. For ASCII this is unnecessary overhead as the relationship - is always 1:1, but for unicode, especially - case folded unicode this is not true. */ + is always 1:1, but for Unicode, especially + case folded Unicode this is not true. */ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ]; U8 *bitmap=NULL; diff --git a/utf8.c b/utf8.c index f5e8649..a761d82 100644 --- a/utf8.c +++ b/utf8.c @@ -378,7 +378,7 @@ Perl_is_utf8_string_loclen(pTHX_ const U8 *s, STRLEN len, const U8 **ep, STRLEN =for apidoc A|UV|utf8n_to_uvuni|const U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags Bottom level UTF-8 decode routine. -Returns the unicode code point value of the first character in the string C +Returns the Unicode code point value of the first character in the string C which is assumed to be in UTF-8 encoding and no longer than C; C will be set to the length, in bytes, of that character. diff --git a/vms/vms.c b/vms/vms.c index f361595..43a9708 100644 --- a/vms/vms.c +++ b/vms/vms.c @@ -419,7 +419,7 @@ int utf8_flag; } } - /* High bit set, but not a unicode character! */ + /* High bit set, but not a Unicode character! */ /* Non printing DECMCS or ISO Latin-1 character? */ if (*inspec <= 0x9F) { @@ -6110,7 +6110,7 @@ static char *mp_do_tounixspec(pTHX_ const char *spec, char *buf, int ts, int * u } if ((*cp2 == '^')) { /* EFS file escape, pass the next character as is */ - /* Fix me: HEX encoding for UNICODE not implemented */ + /* Fix me: HEX encoding for Unicode not implemented */ cp2++; } else if ( *cp2 == '.') { @@ -6125,7 +6125,7 @@ static char *mp_do_tounixspec(pTHX_ const char *spec, char *buf, int ts, int * u for (; cp2 <= dirend; cp2++) { if ((*cp2 == '^')) { /* EFS file escape, pass the next character as is */ - /* Fix me: HEX encoding for UNICODE not implemented */ + /* Fix me: HEX encoding for Unicode not implemented */ *(cp1++) = *(++cp2); /* An escaped dot stays as is -- don't convert to slash */ if (*cp2 == '.') cp2++; @@ -9021,7 +9021,7 @@ Perl_readdir(pTHX_ DIR *dd) if (dd->flags & PERL_VMSDIR_M_UNIXSPECS) { /* Translate the encoded characters. */ - /* Fixme: unicode handling could result in embedded 0 characters */ + /* Fixme: Unicode handling could result in embedded 0 characters */ if (strchr(dd->entry.d_name, '^') != NULL) { char new_name[256]; char * q; @@ -9035,7 +9035,7 @@ Perl_readdir(pTHX_ DIR *dd) /* fix-me */ /* if outchars_added > 1, then this is a wide file specification */ /* Wide file specifications need to be passed in Perl */ - /* counted strings apparently with a unicode flag */ + /* counted strings apparently with a Unicode flag */ } *q = 0; strcpy(dd->entry.d_name, new_name); @@ -12408,7 +12408,7 @@ static int set_features vms_debug_on_exception = 0; } - /* Create VTF-7 filenames from UNICODE instead of UTF-8 */ + /* Create VTF-7 filenames from Unicode instead of UTF-8 */ vms_vtf7_filenames = 0; status = sys_trnlnm("PERL_VMS_VTF7_FILENAMES", val_str, sizeof(val_str)); if ($VMS_STATUS_SUCCESS(status)) {