Allow U+0FFFF in regex

[p5sagit/p5-mst-13.2.git] / lib / unicore / mktables
diff --git a/lib/unicore/mktables b/lib/unicore/mktables

index b268623..c61a3f4 100644 (file)
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -4,7 +4,10 @@
 # Any files created or read by this program should be listed in 'mktables.lst'
 # Use -makelist to regenerate it.
 
-require 5.008;        # Needs pack "U". Probably safest to run on 5.8.x
+# Needs 'no overloading' to run faster on miniperl.  Code commented out at the
+# subroutine objaddr can be used instead to work as far back (untested) as
+# 5.8: needs pack "U".
+require 5.010_001;
 use strict;
 use warnings;
 use Carp;
@@ -295,6 +298,11 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
 # string, but it is a contributory property, and therefore not output by
 # default.
 #
+# DEBUGGING
+#
+# XXX Add more stuff here.   use perl instead of miniperl to find problems with
+# Scalar::Util
+
 # FUTURE ISSUES
 #
 # The program would break if Unicode were to change its names so that
@@ -453,17 +461,8 @@ our $to_trace = 0;
             }
         }
 
-        if ($print_caller) {
-            if (defined $line_number) {
-                    print STDERR sprintf "%4d: ", $line_number;
-            }
-            else {
-                    print STDERR "     ";
-            }
-            $caller_name .= ": ";
-            print STDERR $caller_name;
-        }
-
+        print STDERR sprintf "%4d: ", $line_number if defined $line_number;
+        print STDERR "$caller_name: " if $print_caller;
         print STDERR $output, "\n";
         return;
     }
@@ -549,10 +548,9 @@ while (@ARGV) {
     elsif ($arg eq '-P' && defined ($pod_directory = shift)) {
         -d $pod_directory or croak "Directory '$pod_directory' doesn't exist";
     }
-    elsif ($arg eq '-maketest'
-             || ($arg eq '-T' && defined ($t_path = shift))) {
+    elsif ($arg eq '-maketest' || ($arg eq '-T' && defined ($t_path = shift)))
+    {
         $make_test_script = 1;
-        $t_path = 'TestProp.pl' unless defined $t_path;
     }
     elsif ($arg eq '-makelist') {
         $make_list = 1;
@@ -588,11 +586,12 @@ usage: $0 [-c|-p|-q|-v|-w] [-C dir] [-L filelist] [ -P pod_dir ]
                 except those specified by the -P and -T options will be done
                 with respect to this directory.
   -P dir      : Output $pod_file file to directory 'dir'.
-  -T path     : Create a .t test file as 'path'
+  -T path     : Create a test script as 'path'; overrides -maketest
   -L filelist : Use alternate 'filelist' instead of standard one
   -globlist   : Take as input all non-Test *.txt files in current and sub
                 directories
-  -maketest   : Make test script
+  -maketest   : Make test script 'TestProp.pl' in current (or -C directory),
+                overrides -T
   -makelist   : Rewrite the file list $file_list based on current setup
   -check A B  : Executes $0 only if A and B are the same
 END
@@ -605,25 +604,17 @@ my $youngest = -M $0;   # Do this before the chdir!
 
 # Change directories now, because need to read 'version' early.
 if ($use_directory) {
-    if ($pod_directory
-        && ! File::Spec->file_name_is_absolute($pod_directory))
-    {
+    if ($pod_directory && ! File::Spec->file_name_is_absolute($pod_directory)) {
         $pod_directory = File::Spec->rel2abs($pod_directory);
     }
-    if ($t_path
-        && ! File::Spec->file_name_is_absolute($t_path))
-    {
+    if ($t_path && ! File::Spec->file_name_is_absolute($t_path)) {
         $t_path = File::Spec->rel2abs($t_path);
     }
     chdir $use_directory or croak "Failed to chdir to '$use_directory':$!";
-    if ($pod_directory
-        && File::Spec->file_name_is_absolute($pod_directory))
-    {
+    if ($pod_directory && File::Spec->file_name_is_absolute($pod_directory)) {
         $pod_directory = File::Spec->abs2rel($pod_directory);
     }
-    if ($t_path
-        && File::Spec->file_name_is_absolute($t_path))
-    {
+    if ($t_path && File::Spec->file_name_is_absolute($t_path)) {
         $t_path = File::Spec->abs2rel($t_path);
     }
 }
@@ -688,7 +679,7 @@ if ($v_version gt v3.2.0) {
 # unless explicitly added.
 if ($v_version ge v5.2.0) {
     my $unihan = 'Unihan; remove from list if using Unihan';
-    foreach my $table qw ( 
+    foreach my $table qw (
                            kAccountingNumeric
                            kOtherNumeric
                            kPrimaryNumeric
@@ -915,8 +906,8 @@ my %ignored_files = (
 
 my $HEADER=<<"EOF";
 # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!!
-# This file is machine-generated by $0 from the Unicode database,
-# Version $string_version.  Any changes made here will be lost!
+# This file is machine-generated by $0 from the Unicode
+# database, Version $string_version.  Any changes made here will be lost!
 EOF
 
 my $INTERNAL_ONLY=<<"EOF";
@@ -932,7 +923,7 @@ my $DEVELOPMENT_ONLY=<<"EOF";
 # This file contains information artificially constrained to code points
 # present in Unicode release $string_compare_versions.
 # IT CANNOT BE RELIED ON.  It is for use during development only and should
-# not be used for production.  
+# not be used for production.
 
 EOF
 
@@ -980,7 +971,7 @@ my $CP_IN_NAME = 3;         # The NAME contains the code point appended to it.
 my $NULL = 4;               # The map is to the null string; utf8.c can't
                             # handle these, nor is there an accepted syntax
                             # for them in \p{} constructs
-my $COMPUTE_NO_MULTI_CP = 5; # Pseudo-type; meanst that ranges that would
+my $COMPUTE_NO_MULTI_CP = 5; # Pseudo-type; means that ranges that would
                              # otherwise be $MULTI_CP type are instead type 0
 
 # process_generic_property_file() can accept certain overrides in its input.
@@ -1007,6 +998,7 @@ my $MULTIPLE = 4;          # Don't replace, but add a duplicate record if
 my $NORMAL = "";
 my $SUPPRESSED = 'z';   # The character should never actually be seen, since
                         # it is suppressed
+my $PLACEHOLDER = 'P';  # Implies no pod entry generated
 my $DEPRECATED = 'D';
 my $a_bold_deprecated = "a 'B<$DEPRECATED>'";
 my $A_bold_deprecated = "A 'B<$DEPRECATED>'";
@@ -1028,7 +1020,7 @@ my %status_past_participles = (
     $SUPPRESSED => 'should never be generated',
     $STABILIZED => 'stabilized',
     $OBSOLETE => 'obsolete',
-    $DEPRECATED => 'deprecated'
+    $DEPRECATED => 'deprecated',
 );
 
 # The format of the values of the map tables:
@@ -1083,9 +1075,10 @@ my %Jamo_L;     # Leading consonants
 my %Jamo_V;     # Vowels
 my %Jamo_T;     # Trailing consonants
 
+my @backslash_X_tests;     # List of tests read in for testing \X
 my @unhandled_properties;  # Will contain a list of properties found in
                            # the input that we didn't process.
-my @match_properties;      # properties that have match tables, to be
+my @match_properties;      # Properties that have match tables, to be
                            # listed in the pod
 my @map_properties;        # Properties that get map files written
 my @named_sequences;       # NamedSequences.txt contents.
@@ -1126,34 +1119,47 @@ sub file_exists ($) {   # platform independent '-e'.  This program internally
     return -e internal_file_to_platform($file);
 }
 
-# This 'require' doesn't necessarily work in miniperl, and even if it does,
-# the native perl version of it (which is what would operate under miniperl)
-# is extremely slow, as it does a string eval every call.
-my $has_fast_scalar_util = $\18 !~ /miniperl/
-                            && defined eval "require Scalar::Util";
-
 sub objaddr($) {
-    # Returns the address of the blessed input object.  Uses the XS version if
-    # available.  It doesn't check for blessedness because that would do a
-    # string eval every call, and the program is structured so that this is
-    # never called for a non-blessed object.
-
-    return Scalar::Util::refaddr($_[0]) if $has_fast_scalar_util;
-
-    # Check at least that is a ref.
-    my $pkg = ref($_[0]) or return undef;
+    # Returns the address of the blessed input object.
+    # It doesn't check for blessedness because that would do a string eval
+    # every call, and the program is structured so that this is never called
+    # for a non-blessed object.
 
-    # Change to a fake package to defeat any overloaded stringify
-    bless $_[0], 'main::Fake';
+    no overloading; # If overloaded, numifying below won't work.
 
     # Numifying a ref gives its address.
-    my $addr = 0 + $_[0];
-
-    # Return to original class
-    bless $_[0], $pkg;
-    return $addr;
+    return 0 + $_[0];
 }
 
+# Commented code below should work on Perl 5.8.
+## This 'require' doesn't necessarily work in miniperl, and even if it does,
+## the native perl version of it (which is what would operate under miniperl)
+## is extremely slow, as it does a string eval every call.
+#my $has_fast_scalar_util = $\18 !~ /miniperl/
+#                            && defined eval "require Scalar::Util";
+#
+#sub objaddr($) {
+#    # Returns the address of the blessed input object.  Uses the XS version if
+#    # available.  It doesn't check for blessedness because that would do a
+#    # string eval every call, and the program is structured so that this is
+#    # never called for a non-blessed object.
+#
+#    return Scalar::Util::refaddr($_[0]) if $has_fast_scalar_util;
+#
+#    # Check at least that is a ref.
+#    my $pkg = ref($_[0]) or return undef;
+#
+#    # Change to a fake package to defeat any overloaded stringify
+#    bless $_[0], 'main::Fake';
+#
+#    # Numifying a ref gives its address.
+#    my $addr = 0 + $_[0];
+#
+#    # Return to original class
+#    bless $_[0], $pkg;
+#    return $addr;
+#}
+
 sub max ($$) {
     my $a = shift;
     my $b = shift;
@@ -1193,6 +1199,11 @@ package Carp;
 
 our $Verbose = 1 if main::DEBUG;  # Useful info when debugging
 
+# This is a work-around suggested by Nicholas Clark to fix a problem with Carp
+# and overload trying to load Scalar:Util under miniperl.  See
+# http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2009-11/msg01057.html
+undef $overload::VERSION;
+
 sub my_carp {
     my $message = shift || "";
     my $nofold = shift || 0;
@@ -1465,9 +1476,8 @@ package main;
                     no strict "refs";
                     *$subname = sub {
                         use strict "refs";
-                        my $self = shift;
-                        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
-                        my $addr = main::objaddr $self;
+                        Carp::carp_extra_args(\@_) if main::DEBUG && @_ > 1;
+                        my $addr = main::objaddr $_[0];
                         if (ref $field->{$addr} ne 'ARRAY') {
                             my $type = ref $field->{$addr};
                             $type = 'scalar' unless $type;
@@ -1488,9 +1498,8 @@ package main;
                     no strict "refs";
                     *$subname = sub {
                         use strict "refs";
-                        my $self = shift;
-                        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
-                        return $field->{main::objaddr $self};
+                        Carp::carp_extra_args(\@_) if main::DEBUG && @_ > 1;
+                        return $field->{main::objaddr $_[0]};
                     }
                 }
             }
@@ -1499,11 +1508,12 @@ package main;
                 no strict "refs";
                 *$subname = sub {
                     use strict "refs";
-                    return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2;
-                    my $self = shift;
-                    my $value = shift;
-                    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
-                    $field->{main::objaddr $self} = $value;
+                    if (main::DEBUG) {
+                        return Carp::carp_too_few_args(\@_, 2) if @_ < 2;
+                        Carp::carp_extra_args(\@_) if @_ > 2;
+                    }
+                    # $self is $_[0]; $value is $_[1]
+                    $field->{main::objaddr $_[0]} = $_[1];
                     return;
                 }
             }
@@ -1563,7 +1573,6 @@ package Input_file;
 
 sub trace { return main::trace(@_); }
 
-
 { # Closure
     # Keep track of fields that are to be put into the constructor.
     my %constructor_fields;
@@ -1597,6 +1606,15 @@ sub trace { return main::trace(@_); }
     # processed when you set the $debug_skip global.
     main::set_access('non_skip', \%non_skip, 'c');
 
+    my %skip;
+    # This is used to skip processing of this input file semi-permanently.
+    # It is used for files that we aren't planning to process anytime soon,
+    # but want to allow to be in the directory and not raise a message that we
+    # are not handling.  Mostly for test files.  This is in contrast to the
+    # non_skip element, which is supposed to be used very temporarily for
+    # debugging.  Sets 'optional' to 1
+    main::set_access('skip', \%skip, 'c');
+
     my %each_line_handler;
     # list of subroutines to look at and filter each non-comment line in the
     # file.  defaults to none.  The subroutines are called in order, each is
@@ -1660,6 +1678,7 @@ sub trace { return main::trace(@_); }
         # Set defaults
         $handler{$addr} = \&main::process_generic_property_file;
         $non_skip{$addr} = 0;
+        $skip{$addr} = 0;
         $has_missings_defaults{$addr} = $NO_DEFAULTS;
         $handle{$addr} = undef;
         $added_lines{$addr} = [ ];
@@ -1668,6 +1687,7 @@ sub trace { return main::trace(@_); }
         $missings{$addr} = [ ];
 
         # Two positional parameters.
+        return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2;
         $file{$addr} = main::internal_file_to_platform(shift);
         $first_released{$addr} = shift;
 
@@ -1715,6 +1735,8 @@ sub trace { return main::trace(@_); }
             print "Warning: " . __PACKAGE__ . " constructor for $file{$addr} has useless 'non_skip' in it\n";
         }
 
+        $optional{$addr} = 1 if $skip{$addr};
+
         return $self;
     }
 
@@ -1763,7 +1785,7 @@ sub trace { return main::trace(@_); }
         }
 
         # File could be optional
-        if ($optional{$addr}){
+        if ($optional{$addr}) {
             return unless -e $file;
             my $result = eval $optional{$addr};
             if (! defined $result) {
@@ -1796,11 +1818,12 @@ sub trace { return main::trace(@_); }
         }
         else {
 
-            # Here, the file exists
+            # Here, the file exists.  Some platforms may change the case of
+            # its name
             if ($seen_non_extracted_non_age) {
-                if ($file =~ /$EXTRACTED/) {
+                if ($file =~ /$EXTRACTED/i) {
                     Carp::my_carp_bug(join_lines(<<END
-$file should be processed just after the 'Prop..Alias' files, and before
+$file should be processed just after the 'Prop...Alias' files, and before
 anything not in the $EXTRACTED_DIR directory.  Proceeding, but the results may
 have subtle problems
 END
@@ -1809,8 +1832,8 @@ END
             }
             elsif ($EXTRACTED_DIR
                     && $first_released{$addr} ne v0
-                    && $file !~ /$EXTRACTED/
-                    && $file ne 'DAge.txt')
+                    && $file !~ /$EXTRACTED/i
+                    && lc($file) ne 'dage.txt')
             {
                 # We don't set this (by the 'if' above) if we have no
                 # extracted directory, so if running on an early version,
@@ -1822,10 +1845,20 @@ END
             # isn't a file we are expecting.  As we process the files,
             # they are deleted from the hash, so any that remain at the
             # end of the program are files that we didn't process.
-            Carp::my_carp("Was not expecting '$file'.") if
-                    ! delete $potential_files{File::Spec->rel2abs($file)}
+            my $fkey = File::Spec->rel2abs($file);
+            my $expecting = delete $potential_files{$fkey};
+            $expecting = delete $potential_files{lc($fkey)} unless defined $expecting;
+            Carp::my_carp("Was not expecting '$file'.") if 
+                    ! $expecting                    
                     && ! defined $handle{$addr};
 
+            # Having deleted from expected files, we can quit if not to do
+            # anything.  Don't print progress unless really want verbosity
+            if ($skip{$addr}) {
+                print "Skipping $file.\n" if $verbosity >= $VERBOSE;
+                return;
+            }
+
             # Open the file, converting the slashes used in this program
             # into the proper form for the OS
             my $file_handle;
@@ -3819,7 +3852,7 @@ sub trace { return main::trace(@_); }
         return $self->_add_delete('+', $start, $end, "");
     }
 
-    my $non_ASCII = (ord('A') == 65);   # Assumes test on same platform
+    my $non_ASCII = (ord('A') != 65);   # Assumes test on same platform
 
     sub is_code_point_usable {
         # This used only for making the test script.  See if the input
@@ -3835,14 +3868,12 @@ sub trace { return main::trace(@_); }
 
         # For non-ASCII, we shun the characters that don't have Perl encoding-
         # independent symbols for them.  'A' is such a symbol, so is "\n".
-        # Note, this program hopefully will work on 5.8 Perls, and \v is not
-        # such a symbol in them.
         return $try_hard if $non_ASCII
                             && $code <= 0xFF
                             && ($code >= 0x7F
                                 || ($code >= 0x0E && $code <= 0x1F)
                                 || ($code >= 0x01 && $code <= 0x06)
-                                || $code == 0x0B);  # \v introduced after 5.8
+                                || $code == 0x0B);
 
         # shun null.  I'm (khw) not sure why this was done, but NULL would be
         # the character very frequently used.
@@ -3976,6 +4007,10 @@ sub trace { return main::trace(@_); }
     main::set_access('nominal_short_name_length',
                     \%nominal_short_name_length);
 
+    my %complete_name;
+    # The complete name, including property.
+    main::set_access('complete_name', \%complete_name, 'r');
+
     my %property;
     # Parent property this table is attached to.
     main::set_access('property', \%property, 'r');
@@ -4057,8 +4092,9 @@ sub trace { return main::trace(@_); }
         $name{$addr} = delete $args{'Name'};
         $find_table_from_alias{$addr} = delete $args{'_Alias_Hash'};
         $full_name{$addr} = delete $args{'Full_Name'};
+        my $complete_name = $complete_name{$addr}
+                          = delete $args{'Complete_Name'};
         $internal_only{$addr} = delete $args{'Internal_Only_Warning'} || 0;
-        $perl_extension{$addr} = delete $args{'Perl_Extension'} || 0;
         $property{$addr} = delete $args{'_Property'};
         $range_list{$addr} = delete $args{'_Range_List'};
         $status{$addr} = delete $args{'Status'} || $NORMAL;
@@ -4070,6 +4106,7 @@ sub trace { return main::trace(@_); }
         my $loose_match = delete $args{'Fuzzy'};
         my $note = delete $args{'Note'};
         my $make_pod_entry = delete $args{'Pod_Entry'};
+        my $perl_extension = delete $args{'Perl_Extension'};
 
         # Shouldn't have any left over
         Carp::carp_extra_args(\%args) if main::DEBUG && %args;
@@ -4088,12 +4125,20 @@ sub trace { return main::trace(@_); }
         push @{$description{$addr}}, $description if $description;
         push @{$note{$addr}}, $note if $note;
 
-        # If hasn't set its status already, see if it is on one of the lists
-        # of properties or tables that have particular statuses; if not, is
-        # normal.  The lists are prioritized so the most serious ones are
-        # checked first
-        my $complete_name = $self->complete_name;
-        if (! $status{$addr}) {
+        if ($status{$addr} eq $PLACEHOLDER) {
+
+            # A placeholder table doesn't get documented, is a perl extension,
+            # and quite likely will be empty
+            $make_pod_entry = 0 if ! defined $make_pod_entry;
+            $perl_extension = 1 if ! defined $perl_extension;
+            push @tables_that_may_be_empty, $complete_name{$addr};
+        }
+        elsif (! $status{$addr}) {
+
+            # If hasn't set its status already, see if it is on one of the
+            # lists of properties or tables that have particular statuses; if
+            # not, is normal.  The lists are prioritized so the most serious
+            # ones are checked first
             if (exists $why_suppressed{$complete_name}) {
                 $status{$addr} = $SUPPRESSED;
             }
@@ -4129,6 +4174,8 @@ sub trace { return main::trace(@_); }
             }
         }
 
+        $perl_extension{$addr} = $perl_extension || 0;
+
         # By convention what typically gets printed only or first is what's
         # first in the list, so put the full name there for good output
         # clarity.  Other routines rely on the full name being first on the
@@ -4157,15 +4204,12 @@ sub trace { return main::trace(@_); }
     # class
     for my $sub qw(
                     append_to_body
-                    complete_name
                     pre_body
                 )
                 # append_to_body and pre_body are called in the write() method
                 # to add stuff after the main body of the table, but before
                 # its close; and to prepend stuff before the beginning of the
                 # table.
-                # complete_name returns the complete name of the property and
-                # table, like Script=Latin
     {
         no strict "refs";
         *$sub = sub {
@@ -4745,12 +4789,15 @@ sub trace { return main::trace(@_); }
         my $default_map = delete $args{'Default_Map'};
         my $format = delete $args{'Format'};
         my $property = delete $args{'_Property'};
+        my $full_name = delete $args{'Full_Name'};
         # Rest of parameters passed on
 
         my $range_list = Range_Map->new(Owner => $property);
 
         my $self = $class->SUPER::new(
                                     Name => $name,
+                                    Complete_Name =>  $full_name,
+                                    Full_Name => $full_name,
                                     _Property => $property,
                                     _Range_List => $range_list,
                                     %args);
@@ -4780,13 +4827,6 @@ sub trace { return main::trace(@_); }
         return "Map table for Property '$name'";
     }
 
-    sub complete_name {
-        # The complete name for a map table is just its full name, as that
-        # completely identifies the property it represents
-
-        return shift->full_name;
-    }
-
     sub add_alias {
         # Add a synonym for this table (which means the property itself)
         my $self = shift;
@@ -5009,8 +5049,6 @@ sub trace { return main::trace(@_); }
             # But for $STRING properties, must calculate now.  Subtract the
             # count from each range that maps to the default.
             foreach my $range ($self->_range_list->ranges) {
-        local $to_trace = 1 if main::DEBUG;
-        trace $self, $range;
                 if ($range->value eq $default_map) {
                     $count -= $range->end +1 - $range->start;
                 }
@@ -5717,6 +5755,10 @@ sub trace { return main::trace(@_); }
         # The property for which this table is a listing of property values.
         my $property = delete $args{'_Property'};
 
+        my $name = delete $args{'Name'};
+        my $full_name = delete $args{'Full_Name'};
+        $full_name = $name if ! defined $full_name;
+
         # Optional
         my $initialize = delete $args{'Initialize'};
         my $matches_all = delete $args{'Matches_All'} || 0;
@@ -5725,7 +5767,22 @@ sub trace { return main::trace(@_); }
         my $range_list = Range_List->new(Initialize => $initialize,
                                          Owner => $property);
 
+        my $complete = $full_name;
+        $complete = '""' if $complete eq "";  # A null name shouldn't happen,
+                                              # but this helps debug if it
+                                              # does
+        # The complete name for a match table includes it's property in a
+        # compound form 'property=table', except if the property is the
+        # pseudo-property, perl, in which case it is just the single form,
+        # 'table' (If you change the '=' must also change the ':' in lots of
+        # places in this program that assume an equal sign)
+        $complete = $property->full_name . "=$complete" if $property != $perl;
+        
+
         my $self = $class->SUPER::new(%args,
+                                      Name => $name,
+                                      Complete_Name => $complete,
+                                      Full_Name => $full_name,
                                       _Property => $property,
                                       _Range_List => $range_list,
                                       );
@@ -5805,7 +5862,7 @@ sub trace { return main::trace(@_); }
     sub _operator_stringify {
         my $self = shift;
 
-        my $name= $self->complete_name;
+        my $name = $self->complete_name;
         return "Table '$name'";
     }
 
@@ -6014,26 +6071,6 @@ sub trace { return main::trace(@_); }
         return $self->_range_list->add_range(@_);
     }
 
-    sub complete_name {
-        # The complete name for a match table includes it's property in a
-        # compound form 'property=table', except if the property is the
-        # pseudo-property, perl, in which case it is just the single form,
-        # 'table'
-
-        my $self = shift;
-        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
-
-        my $name = $self->full_name;
-        my $property = $self->property;
-        $name = '""' if $name eq "";  # A null name shouldn't happen, but this
-                                      # helps debug if it does
-        return $name if $property == $perl;
-
-        # (If change the '=' must also change the ':' in set_final_comment(),
-        # and the references to colon in its text)
-        return $property->full_name . '=' . $name;
-    }
-
     sub pre_body {  # Does nothing for match tables.
         return
     }
@@ -6198,7 +6235,17 @@ END
                     my $flag = $property->status
                                 || $table->status
                                 || $table_alias_object->status;
-                    $flags{$flag} = $status_past_participles{$flag} if $flag;
+                    if ($flag) {
+                        if ($flag ne $PLACEHOLDER) {
+                            $flags{$flag} = $status_past_participles{$flag};
+                        } else {
+                            $flags{$flag} = <<END;
+a placeholder because it is not in Version $string_version of Unicode, but is
+needed by the Perl core to work gracefully.  Because it is not in this version
+of Unicode, it will not be listed in $pod_file.pod
+END
+                        }
+                    }
 
                     $loose_count++;
 
@@ -6215,6 +6262,9 @@ END
                 push @note, $table->note;
                 push @conflicting, $table->conflicting;
 
+                # And this for output after all the tables.
+                push @global_comments, $table->comment;
+
                 # Compute an alternate compound name using the final property
                 # synonym and the first table synonym with a colon instead of
                 # the equal sign used elsewhere.
@@ -6300,8 +6350,10 @@ END
         if (%flags) {
             foreach my $flag (sort keys %flags) {
                 $comment .= <<END;
-'$flag' below means that this form is $flags{$flag}.  Consult $pod_file.pod
+'$flag' below means that this form is $flags{$flag}.
 END
+                next if $flag eq $PLACEHOLDER;
+                $comment .= "Consult $pod_file.pod\n";
             }
             $comment .= "\n";
         }
@@ -6311,7 +6363,7 @@ This file returns the $code_points in Unicode Version $string_version that
 $match$synonyms:
 
 $matches_comment
-$pod_file.pod should be consulted for the rules on using $any_of_these,
+$pod_file.pod should be consulted for the syntax rules for $any_of_these,
 including if adding or subtracting white space, underscore, and hyphen
 characters matters or doesn't matter, and other permissible syntactic
 variants.  Upper/lower case distinctions never matter.
@@ -6340,7 +6392,9 @@ END
 
         # And append any comment(s) from the actual tables.  They are all
         # gathered here, so may not read all that well.
-        $comment .= "\n" . join "\n\n", @global_comments if @global_comments;
+        if (@global_comments) {
+            $comment .= "\n" . join("\n\n", @global_comments) . "\n";
+        }
 
         if ($count) {   # The format differs if no code points, and needs no
                         # explanation in that case
@@ -8385,7 +8439,7 @@ END
         # Complications arise because any number of properties can be in the
         # file, in any order, interspersed in any way.  The first time a
         # property is seen, it gets information about that property and
-        # cache's it for quick retrieval later.  It also normalizes the maps
+        # caches it for quick retrieval later.  It also normalizes the maps
         # so that only one of many synonym is stored.  The Unicode input files
         # do use some multiple synonyms.
 
@@ -8641,8 +8695,8 @@ END
                         # Make sure there is no conflict between the two.
                         # $missings has priority.
                         if (ref $missings) {
-                            $default_table 
-                                = $property_object->table($default_map);
+                            $default_table
+                                        = $property_object->table($default_map);
                             if (! defined $default_table
                                 || $default_table != $missings)
                             {
@@ -8788,7 +8842,7 @@ END
     }
 }
 
-# Unused until revise charnames;
+# XXX Unused until revise charnames;
 #sub check_and_handle_compound_name {
 #    This looks at Name properties for parenthesized components and splits
 #    them off.  Thus it finds FF as an equivalent to Form Feed.
@@ -8916,7 +8970,7 @@ END
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
         my $Perl_decomp = Property->new('Perl_Decomposition_Mapping',
-                                        Directory => '.',
+                                        Directory => File::Spec->curdir(),
                                         File => 'Decomposition',
                                         Format => $STRING_FORMAT,
                                         Internal_Only_Warning => 1,
@@ -9497,6 +9551,18 @@ END
     }
 } # End closure for UnicodeData
 
+sub process_GCB_test {
+
+    my $file = shift;
+    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+    while ($file->next_line) {
+        push @backslash_X_tests, $_;
+    }
+        
+    return;
+}
+
 sub process_NamedSequences {
     # NamedSequences.txt entries are just added to an array.  Because these
     # don't look like the other tables, they have their own handler.
@@ -9683,8 +9749,7 @@ sub filter_special_casing_line {
 
 sub filter_old_style_case_folding {
     # This transforms $_ containing the case folding style of 3.0.1, to 3.1
-    # and later style, then calls the handler for the later style.  Different
-    # letters were used.
+    # and later style.  Different letters were used in the earlier.
 
     my $file = shift;
     Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -9862,13 +9927,6 @@ sub filter_jamo_line {
     return;
 }
 
-sub filter_numeric_value_comment_missing_line {
-    # Filters out the extra column in DNumValues.txt of this line
-
-    s/^($missing_defaults_prefix)\s*;\s*/$1/;
-    return;
-}
-
 sub register_fraction($) {
     # This registers the input rational number so that it can be passed on to
     # utf8_heavy.pl, both in rational and floating forms.
@@ -10796,19 +10854,79 @@ sub compile_perl() {
         $CanonDCIJ = $CanonDCIJ & $Assigned;
     }
 
-    # This is used in Unicode's definition of \X
+    # These are used in Unicode's definition of \X
+    my $begin = $perl->add_match_table('_X_Begin', Perl_Extension => 1);
+    my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1);
+
     my $gcb = property_ref('Grapheme_Cluster_Break');
-    if (defined $gcb) {
-        my $extend = $perl->add_match_table('_GCB_Extend',
-                                        Initialize => $gcb->table('Extend'));
-        $extend += $gcb->table('SpacingMark')
-                                        if defined $gcb->table('SpacingMark');
+
+    # The 'extended' grapheme cluster came in 5.1.  The non-extended 
+    # definition differs too much from the traditional Perl one to use.
+    if (defined $gcb && defined $gcb->table('SpacingMark')) {
+
+        # Note that assumes HST is defined; it came in an earlier release than
+        # GCB.  In the line below, two negatives means: yes hangul
+        $begin += ~ property_ref('Hangul_Syllable_Type')
+                                                    ->table('Not_Applicable')
+               + ~ ($gcb->table('Control')
+                    + $gcb->table('CR')
+                    + $gcb->table('LF'));
+        $begin->add_comment('For use in \X; matches: Hangul_Syllable | ! Control');
+
+        $extend += $gcb->table('Extend') + $gcb->table('SpacingMark');
+        $extend->add_comment('For use in \X; matches: Extend | SpacingMark');
     }
     else {    # Old definition, used on early releases.
-        $perl->add_match_table('_X_Extend', Initialize => $gc->table('Mark')
-                                                            + 0x200C    # ZWNJ
-                                                            + 0x200D    # ZWJ
-        );
+        $extend += $gc->table('Mark')
+                + 0x200C    # ZWNJ
+                + 0x200D;   # ZWJ
+        $begin += ~ $extend;
+
+        # Here we may have a release that has the regular grapheme cluster
+        # defined, or a release that doesn't have anything defined.
+        # We set things up so the Perl core degrades gracefully, possibly with
+        # placeholders that match nothing.
+
+        if (! defined $gcb) {
+            $gcb = Property->new('GCB', Status => $PLACEHOLDER);
+        }
+        my $hst = property_ref('HST');
+        if (!defined $hst) {
+            $hst = Property->new('HST', Status => $PLACEHOLDER);
+            $hst->add_match_table('Not_Applicable',
+                                Initialize => $Any,
+                                Matches_All => 1);
+        }
+
+        # On some releases, here we may not have the needed tables for the
+        # perl core, in some releases we may.
+        foreach my $name (qw{ L LV LVT T V prepend }) {
+            my $table = $gcb->table($name);
+            if (! defined $table) {
+                $table = $gcb->add_match_table($name);
+                push @tables_that_may_be_empty, $table->complete_name;
+            }
+
+            # The HST property predates the GCB one, and has identical tables
+            # for some of them, so use it if we can.
+            if ($table->is_empty
+                && defined $hst
+                && defined $hst->table($name))
+            {
+                $table += $hst->table($name);
+            }
+        }
+    }
+
+    # More GCB.  If we found some hangul syllables, populate a combined
+    # table.
+    my $lv_lvt_v = $perl->add_match_table('_X_LV_LVT_V');
+    my $LV = $gcb->table('LV');
+    if ($LV->is_empty) {
+        push @tables_that_may_be_empty, $lv_lvt_v->complete_name;
+    } else {
+        $lv_lvt_v += $LV + $gcb->table('LVT') + $gcb->table('V');
+        $lv_lvt_v->add_comment('For use in \X; matches: HST=LV | HST=LVT | HST=V');
     }
 
     # Create a new property specially located that is a combination of the
@@ -10819,7 +10937,7 @@ sub compile_perl() {
     my $perl_charname = Property->new('Perl_Charnames',
                                 Core_Access => '\N{...} and charnames.pm',
                                 Default_Map => "",
-                                Directory => '.',
+                                Directory => File::Spec->curdir(),
                                 File => 'Name',
                                 Internal_Only_Warning => 1,
                                 Perl_Extension => 1,
@@ -10906,7 +11024,7 @@ END
                             Type => $ENUM,
                             Initialize => $ccc,
                             File => 'CombiningClass',
-                            Directory => '.',
+                            Directory => File::Spec->curdir(),
                             );
     $perl_ccc->set_to_output_map(1);
     $perl_ccc->add_comment(join_lines(<<END
@@ -11832,24 +11950,37 @@ sub make_table_pod_entries($) {
 sub pod_alphanumeric_sort {
     # Sort pod entries alphanumerically.
 
-    # The first few character columns are filler; and get rid of all the
-    # trailing stuff, starting with the trailing '}', so as to sort on just
-    # '\p{Name=Value'
-    my $a = lc substr($a, $FILLER);
+    # The first few character columns are filler, plus the '\p{'; and get rid
+    # of all the trailing stuff, starting with the trailing '}', so as to sort
+    # on just 'Name=Value'
+    (my $a = lc $a) =~ s/^ .*? { //x;
     $a =~ s/}.*//;
-    my $b = lc substr($b, $FILLER);
+    (my $b = lc $b) =~ s/^ .*? { //x;
     $b =~ s/}.*//;
 
+    # Determine if the two operands are both internal only or both not.
+    # Character 0 should be a '\'; 1 should be a p; 2 should be '{', so 3
+    # should be the underscore that begins internal only
+    my $a_is_internal = (substr($a, 0, 1) eq '_');
+    my $b_is_internal = (substr($b, 0, 1) eq '_');
+
+    # Sort so the internals come last in the table instead of first (which the
+    # leading underscore would otherwise indicate).
+    if ($a_is_internal != $b_is_internal) {
+        return 1 if $a_is_internal;
+        return -1
+    }
+
     # Determine if the two operands are numeric property values or not.
-    # A numeric property will look like \p{xyz: 3}.  But the number
+    # A numeric property will look like xyz: 3.  But the number
     # can begin with an optional minus sign, and may have a
-    # fraction or rational component, like \p{xyz: 3/2}.  If either
+    # fraction or rational component, like xyz: 3/2.  If either
     # isn't numeric, use alphabetic sort.
     my ($a_initial, $a_number) =
-        ($a =~ /^\\p{ ( [^:=]+ [:=] \s* ) (-? \d+ (?: [.\/] \d+)? )/ix);
+        ($a =~ /^ ( [^:=]+ [:=] \s* ) (-? \d+ (?: [.\/] \d+)? )/ix);
     return $a cmp $b unless defined $a_number;
     my ($b_initial, $b_number) =
-        ($b =~ /^\\p{ ( [^:=]+ [:=] \s* ) (-? \d+ (?: [.\/] \d+)? )/ix);
+        ($b =~ /^ ( [^:=]+ [:=] \s* ) (-? \d+ (?: [.\/] \d+)? )/ix);
     return $a cmp $b unless defined $b_number;
 
     # Here they are both numeric, but use alphabetic sort if the
@@ -12630,7 +12761,7 @@ sub write_all_tables() {
 
                     # Add an entry in the pod file for the table; it also does
                     # the children.
-                    make_table_pod_entries($table);
+                    make_table_pod_entries($table) if defined $pod_directory;
 
                     # See if the the table matches identical code points with
                     # something that has already been output.  In that case,
@@ -12698,11 +12829,14 @@ sub write_all_tables() {
                                                 = $standard_property_name;
                         }
 
-                        # Now for the pod entry for this alias.  Skip
-                        # the first one, which is the full name so won't have
-                        # an entry like: '\p{full: *}   \p{full: *}', and skip
-                        # if don't want an entry for this one.
-                        next if $i == 0 || ! $alias->make_pod_entry;
+                        # Now for the pod entry for this alias.  Skip if not
+                        # outputting a pod; skip the first one, which is the
+                        # full name so won't have an entry like: '\p{full: *}
+                        # \p{full: *}', and skip if don't want an entry for
+                        # this one.
+                        next if $i == 0
+                                || ! defined $pod_directory
+                                || ! $alias->make_pod_entry;
 
                         push @match_properties,
                             format_pod_line($indent_info_column,
@@ -13038,6 +13172,8 @@ sub make_property_test_script() {
     # tests are added, it will perturb all later ones in the .t file
     srand 0;
 
+    $t_path = 'TestProp.pl' unless defined $t_path; # the traditional name
+
     force_unlink ($t_path);
     push @files_actually_output, $t_path;
     my $OUT;
@@ -13212,6 +13348,11 @@ sub make_property_test_script() {
             }
         }
     }
+
+    foreach my $test (@backslash_X_tests) {
+        print $OUT "Test_X('$test');\n";
+    }
+
     print $OUT "Finished();\n";
     close $OUT;
     return;
@@ -13238,7 +13379,7 @@ my @input_file_objects = (
                     Handler => \&process_PropertyAliases,
                     ),
     Input_file->new(undef, v0,  # No file associated with this
-                    Progress_Message => 'Finishing Property Setup',
+                    Progress_Message => 'Finishing property setup',
                     Handler => \&finish_property_setup,
                     ),
     Input_file->new('PropValueAliases.txt', v0,
@@ -13361,6 +13502,9 @@ my @input_file_objects = (
     Input_file->new('BidiMirroring.txt', v3.0.1,
                     Property => 'Bidi_Mirroring_Glyph',
                     ),
+    Input_file->new("NormalizationTest.txt", v3.0.1,
+                    Skip => 1,
+                    ),
     Input_file->new('CaseFolding.txt', v3.0.1,
                     Pre_Handler => \&setup_case_folding,
                     Each_Line_Handler =>
@@ -13398,6 +13542,18 @@ my @input_file_objects = (
                     Property => 'Grapheme_Cluster_Break',
                     Has_Missings_Defaults => $NOT_IGNORED,
                     ),
+    Input_file->new("$AUXILIARY/GCBTest.txt", v4.1.0,
+                    Handler => \&process_GCB_test,
+                    ),
+    Input_file->new("$AUXILIARY/LBTest.txt", v4.1.0,
+                    Skip => 1,
+                    ),
+    Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0,
+                    Skip => 1,
+                    ),
+    Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0,
+                    Skip => 1,
+                    ),
     Input_file->new("$AUXILIARY/SentenceBreakProperty.txt", v4.1.0,
                     Property => 'Sentence_Break',
                     Has_Missings_Defaults => $NOT_IGNORED,
@@ -13408,6 +13564,9 @@ my @input_file_objects = (
     Input_file->new('NameAliases.txt', v5.0.0,
                     Property => 'Name_Alias',
                     ),
+    Input_file->new("BidiTest.txt", v5.2.0,
+                    Skip => 1,
+                    ),
     Input_file->new('UnihanIndicesDictionary.txt', v5.2.0,
                     Optional => 1,
                     Each_Line_Handler => \&filter_unihan_line,
@@ -13455,18 +13614,16 @@ END
 
 # Put into %potential_files a list of all the files in the directory structure
 # that could be inputs to this program, excluding those that we should ignore.
-# Also don't consider test files.  Use absolute file names because it makes it
-# easier across machine types.
+# Use absolute file names because it makes it easier across machine types.
 my @ignored_files_full_names = map { File::Spec->rel2abs(
                                      internal_file_to_platform($_))
                                 } keys %ignored_files;
 File::Find::find({
     wanted=>sub {
-        return unless /\.txt$/i;
-        return if /Test\.txt$/i;
-        my $full = File::Spec->rel2abs($_);
+        return unless /\.txt$/i;  # Some platforms change the name's case
+        my $full = lc(File::Spec->rel2abs($_));
         $potential_files{$full} = 1
-                        if ! grep { $full eq $_ } @ignored_files_full_names;
+                    if ! grep { $full eq lc($_) } @ignored_files_full_names;
         return;
     }
 }, File::Spec->curdir());
@@ -13479,8 +13636,8 @@ if ($write_unchanged_files) {
 else {
     print "Reading file list '$file_list'\n" if $verbosity >= $VERBOSE;
     my $file_handle;
-    if (! open $file_handle,"<",$file_list) {
-        Carp::my_carp("Failed to open '$file_list', turning on -globlist option instead: $!");
+    if (! open $file_handle, "<", $file_list) {
+        Carp::my_carp("Failed to open '$file_list' (this is expected to be missing the first time); turning on -globlist option instead: $!");
         $glob_list = 1;
     }
     else {
@@ -13510,9 +13667,9 @@ else {
 
             # The paths are stored with relative names, and with '/' as the
             # delimiter; convert to absolute on this machine
-            my $full = File::Spec->rel2abs(internal_file_to_platform($input));
+            my $full = lc(File::Spec->rel2abs(internal_file_to_platform($input)));
             $potential_files{$full} = 1
-                        if ! grep { $full eq $_ } @ignored_files_full_names;
+                        if ! grep { lc($full) eq lc($_) } @ignored_files_full_names;
         }
     }
 
@@ -13524,12 +13681,16 @@ if ($glob_list) {
     # Here wants to process all .txt files in the directory structure.
     # Convert them to full path names.  They are stored in the platform's
     # relative style
-    my @known_files = map { File::Spec->rel2abs($_->file) }
-                                                        @input_file_objects;
+    my @known_files;
+    foreach my $object (@input_file_objects) {
+        my $file = $object->file;
+        next unless defined $file;
+        push @known_files, File::Spec->rel2abs($file);
+    }
 
     my @unknown_input_files;
     foreach my $file (keys %potential_files) {
-        next if grep { $file eq $_ } @known_files;
+        next if grep { lc($file) eq lc($_) } @known_files;
 
         # Here, the file is unknown to us.  Get relative path name
         $file = File::Spec->abs2rel($file);
@@ -13545,7 +13706,7 @@ if ($glob_list) {
         # If the file isn't extracted (meaning none of the directories is the
         # extracted one), just add it to the end of the list of inputs.
         if (! grep { $EXTRACTED_DIR eq $_ } @directories) {
-            push @input_file_objects, Input_file->new($file);
+            push @input_file_objects, Input_file->new($file, v0);
         }
         else {
 
@@ -13557,10 +13718,11 @@ if ($glob_list) {
             # $compare_versions is set.
             for (my $i = 0; $i < @input_file_objects; $i++) {
                 if ($input_file_objects[$i]->first_released ne v0
-                    && $input_file_objects[$i]->file ne 'DAge.txt'
-                    && $input_file_objects[$i]->file !~ /$EXTRACTED_DIR/)
+                    && lc($input_file_objects[$i]->file) ne 'dage.txt'
+                    && $input_file_objects[$i]->file !~ /$EXTRACTED_DIR/i)
                 {
-                    splice @input_file_objects, $i, 0, Input_file->new($file);
+                    splice @input_file_objects, $i, 0,
+                                                Input_file->new($file, v0);
                     last;
                 }
             }
@@ -13568,22 +13730,16 @@ if ($glob_list) {
         }
     }
     if (@unknown_input_files) {
-        print STDERR simple_fold(join_line(<<END
+        print STDERR simple_fold(join_lines(<<END
 
 The following files are unknown as to how to handle.  Assuming they are
 typical property files.  You'll know by later error messages if it worked or
 not:
 END
-        ) . join(", ", @unknown_input_files) . "\n\n");
+        ) . " " . join(", ", @unknown_input_files) . "\n\n");
     }
 } # End of looking through directory structure for more .txt files.
 
-if ( $make_list ) {
-    foreach my $file (@mktables_list_output_files) {
-        unlink internal_file_to_platform($file);
-    }
-}
-
 # Create the list of input files from the objects we have defined, plus
 # version
 my @input_files = 'version';
@@ -13740,28 +13896,53 @@ __DATA__
 use strict;
 use warnings;
 
-# Test the \p{} regular expression constructs.  This file is constructed by
-# mktables from the tables it generates, so if mktables is buggy, this won't
-# necessarily catch those bugs.  Tests are generated for all feasible
-# properties; a few aren't currently feasible; see is_code_point_usable()
-# in mktables for details.
+# Test qr/\X/ and the \p{} regular expression constructs.  This file is
+# constructed by mktables from the tables it generates, so if mktables is
+# buggy, this won't necessarily catch those bugs.  Tests are generated for all
+# feasible properties; a few aren't currently feasible; see
+# is_code_point_usable() in mktables for details.
 
 # Standard test packages are not used because this manipulates SIG_WARN.  It
 # exits 0 if every non-skipped test succeeded; -1 if any failed.
 
 my $Tests = 0;
 my $Fails = 0;
-my $Skips = 0;
 
-my $non_ASCII = (ord('A') == 65);
+my $non_ASCII = (ord('A') != 65);
+
+# The 256 8-bit characters in ASCII ordinal order, with the ones that don't
+# have Perl names replaced by -1
+my @ascii_ordered_chars = (
+    "\0",
+    (-1) x 6,
+    "\a", "\b", "\t", "\n",
+    -1,   # No Vt 
+    "\f", "\r",
+    (-1) x 18,
+    " ", "!", "\"", "#", '$', "%", "&", "'",
+    "(", ")", "*", "+", ",", "-", ".", "/",
+    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
+    ":", ";", "<", "=", ">", "?", "@",
+    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
+    "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
+    "[", "\\", "]", "^", "_", "`",
+    "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
+    "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
+    "{", "|", "}", "~",
+    (-1) x 129
+);
 
-# The first 127 ASCII characters in ordinal order, with the ones that don't
-# have Perl names (as of 5.8) replaced by dots.  The 127th is used as the
-# string delimiter
-my $ascii_to_ebcdic = "\0......\a\b\t\n.\f\r.................. !\"#\$\%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
-#for my $i (0..126) {
-#    print $i, ": ", substr($ascii_to_ebcdic, $i, 1), "\n";
-#}
+sub ASCII_ord_to_native ($) {
+    # Converts input ordinal number to the native one, if can be done easily.
+    # Returns -1 otherwise.
+
+    my $ord = shift;
+
+    return $ord if $ord > 255 || ! $non_ASCII;
+    my $result = $ascii_ordered_chars[$ord];
+    return $result if $result eq '-1';
+    return ord($result);
+}
 
 sub Expect($$$$) {
     my $expected = shift;
@@ -13771,38 +13952,24 @@ sub Expect($$$$) {
                                 # or empty if none
     my $line   = (caller)[2];
 
-    # Convert the code point to hex form
-    my $string = sprintf "\"\\x{%04X}\"", $ord; 
-
-    # Convert the non-ASCII code points expressible as characters in Perl 5.8
-    # to their ASCII equivalents, and skip the others.
-    if ($non_ASCII && $ord < 255) {
-
-        # Dots are used as place holders in the conversion string for the
-        # non-convertible ones, so check for it first.
-        if ($ord == 0x2E) {
-            $ord = ord('.');
-        }
-        elsif ($ord < 0x7F
-                  # Any dots returned are non-convertible.
-                 && ((my $char = substr($ascii_to_ebcdic, $ord, 1)) ne '.'))
-        {
-            #print STDERR "$ord, $char, \n";
-            $ord = ord($char);
-        }
-        else {
-            $Tests++;
-            $Skips++;
-            print "ok $Tests - $string =~ $regex # Skipped: non-ASCII\n";
-            return;
-        }
+    # Convert the non-ASCII code points expressible as characters to their
+    # ASCII equivalents, and skip the others.
+    $ord = ASCII_ord_to_native($ord);
+    if ($ord < 0) {
+        $Tests++;
+        print "ok $Tests - "
+              . sprintf("\"\\x{%04X}\"", $ord)
+              . " =~ $regex # Skipped: non-ASCII\n";
+        return;
     }
 
-    # The first time through, use all warnings.
+    # Convert the code point to hex form
+    my $string = sprintf "\"\\x{%04X}\"", $ord;
+
     my @tests = "";
 
-    # If the input should generate a warning, add another time through with
-    # them turned off
+    # The first time through, use all warnings.  If the input should generate
+    # a warning, add another time through with them turned off
     push @tests, "no warnings '$warning_type';" if $warning_type;
 
     foreach my $no_warnings (@tests) {
@@ -13862,9 +14029,142 @@ sub Error($) {
     return;
 }
 
+# GCBTest.txt character that separates grapheme clusters
+my $breakable_utf8 = my $breakable = chr(0xF7);
+utf8::upgrade($breakable_utf8);
+
+# GCBTest.txt character that indicates that the adjoining code points are part
+# of the same grapheme cluster
+my $nobreak_utf8 = my $nobreak = chr(0xD7);
+utf8::upgrade($nobreak_utf8);
+
+sub Test_X($) {
+    # Test qr/\X/ matches.  The input is a line from auxiliary/GCBTest.txt
+    # Each such line is a sequence of code points given by their hex numbers,
+    # separated by the two characters defined just before this subroutine that
+    # indicate that either there can or cannot be a break between the adjacent
+    # code points.  If there isn't a break, that means the sequence forms an
+    # extended grapheme cluster, which means that \X should match the whole
+    # thing.  If there is a break, \X should stop there.  This is all
+    # converted by this routine into a match:
+    #   $string =~ /(\X)/,
+    # Each \X should match the next cluster; and that is what is checked.
+
+    my $template = shift;
+
+    my $line   = (caller)[2];
+
+    # The line contains characters above the ASCII range, but in Latin1.  It
+    # may or may not be in utf8, and if it is, it may or may not know it.  So,
+    # convert these characters to 8 bits.  If knows is in utf8, simply
+    # downgrade.
+    if (utf8::is_utf8($template)) {
+        utf8::downgrade($template);
+    } else {
+
+        # Otherwise, if it is in utf8, but doesn't know it, the next lines
+        # convert the two problematic characters to their 8-bit equivalents.
+        # If it isn't in utf8, they don't harm anything.
+        use bytes;
+        $template =~ s/$nobreak_utf8/$nobreak/g;
+        $template =~ s/$breakable_utf8/$breakable/g;
+    }
+
+    # Get rid of the leading and trailing breakables
+    $template =~ s/^ \s* $breakable \s* //x;
+    $template =~ s/ \s* $breakable \s* $ //x;
+
+    # And no-breaks become just a space.
+    $template =~ s/ \s* $nobreak \s* / /xg;
+
+    # Split the input into segments that are breakable between them.
+    my @segments = split /\s*$breakable\s*/, $template;
+
+    my $string = "";
+    my $display_string = "";
+    my @should_match;
+    my @should_display;
+
+    # Convert the code point sequence in each segment into a Perl string of
+    # characters
+    foreach my $segment (@segments) {
+        my @code_points = split /\s+/, $segment;
+        my $this_string = "";
+        my $this_display = "";
+        foreach my $code_point (@code_points) {
+            my $ord = ASCII_ord_to_native(hex $code_point);
+            if ($ord < 0) {
+                $Tests++;
+                print "ok $Tests - String containing $code_point =~ /(\\X)/g # Skipped: non-ASCII\n";
+                return;
+            }
+            $this_string .= chr $ord;
+            $this_display .= "\\x{$code_point}";
+        }
+
+        # The next cluster should match the string in this segment.
+        push @should_match, $this_string;
+        push @should_display, $this_display;
+        $string .= $this_string;
+        $display_string .= $this_display;
+    }
+
+    # If a string can be represented in both non-ut8 and utf8, test both cases
+    UPGRADE:
+    for my $to_upgrade (0 .. 1) {
+        
+        if ($to_upgrade) {
+
+            # If already in utf8, would just be a repeat
+            next UPGRADE if utf8::is_utf8($string);
+
+            utf8::upgrade($string);
+        }
+
+        # Finally, do the \X match.
+        my @matches = $string =~ /(\X)/g;
+
+        # Look through each matched cluster to verify that it matches what we
+        # expect.
+        my $min = (@matches < @should_match) ? @matches : @should_match;
+        for my $i (0 .. $min - 1) {
+            $Tests++;
+            if ($matches[$i] eq $should_match[$i]) {
+                print "ok $Tests - ";
+                if ($i == 0) {
+                    print "In \"$display_string\" =~ /(\\X)/g, \\X #1";
+                } else {
+                    print "And \\X #", $i + 1,
+                }
+                print " correctly matched $should_display[$i]; line $line\n";
+            } else {
+                $matches[$i] = join("", map { sprintf "\\x{%04X}", $_ }
+                                                    unpack("U*", $matches[$i]));
+                print "not ok $Tests - In \"$display_string\" =~ /(\\X)/g, \\X #",
+                    $i + 1,
+                    " should have matched $should_display[$i]",
+                    " but instead matched $matches[$i]",
+                    ".  Abandoning rest of line $line\n";
+                next UPGRADE;
+            }
+        }
+
+        # And the number of matches should equal the number of expected matches.
+        $Tests++;
+        if (@matches == @should_match) {
+            print "ok $Tests - Nothing was left over; line $line\n";
+        } else {
+            print "not ok $Tests - There were ", scalar @should_match, " \\X matches expected, but got ", scalar @matches, " instead; line $line\n";
+        }
+    }
+
+    return;
+}
+
 sub Finished() {
-    print "1..$Tests.\n";
+    print "1..$Tests\n";
     exit($Fails ? -1 : 0);
 }
 
 Error('\p{Script=InGreek}');    # Bug #69018
+Test_X("1100 $nobreak 1161");  # Bug #70940