From: Uri Guttman
Date: Fri, 31 Oct 2008 01:36:20 +0000 (-0500)
Subject: initial commit
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fmaster;p=urisagit%2FSort-Maker.git
initial commit
---
7468c58403ae8ecc1b2bdefba904be9e8198cfef
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d1f8c03
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+;; This buffer is for notes you don't want to save, and for Lisp evaluation.
+;; If you want to create a file, visit that file with C-x C-f,
+;; then enter the text in that file's own buffer.
+
+CVS
+*.gz
+blib
+*.tar
+old
+*~
+
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..2f2880e
--- /dev/null
+++ b/Changes
@@ -0,0 +1,58 @@
+Revision history for Perl extension Sort::Maker.
+
+0.06 Fri Dec 29 00:47:00 EST 2006
+ - renamed t/common.pl to t/common.pm
+ - added support for error tests in t/common.pm
+ - rewrote t/error.t to use test table and common.pm
+ - created proper README file
+ - fixed doc bug (removed 'style =>')
+ -from Mumia W..
+ - added t/closure.t
+ -initial code from Mumia W..
+
+ - added support for closures for key extraction
+ -idea and initial code and pod from Mumia W..
+
+
+0.05 Thu Apr 6 03:15:09 EDT 2006
+ - fixed parsing array ref key description
+ - added 2 tests to t/error.t
+ -from John Kent
+ - reordered Changes to be newer to older
+
+0.04 Wed Apr 27 01:37:00 EDT 2005
+ - fixed many doc typos and mistakes
+ -from Brad Baxter
+ - fixed 'name' option bug, added test and docs
+ -from Damian Conway
+ - added support for code refs as extraction code
+ cleaned up the extraction code documentation
+ -from Damian Conway
+
+0.03 Tue Feb 8 03:45:32 EST 2005
+ - fixed bug when using GRT and varying strings and ref_in.
+ new test - ref_in_varying.t
+ -from Brad Baxter
+ - fixed bug when using GRT and descending string sort with
+ numeric data.
+ new test - descending_grt_string.t
+ -from Brad Baxter
+
+0.02 Fri Aug 27 00:43:25 EDT 2004
+ - fixed bug in decending integer/float GRT sorts (tests included)
+ -from Marc Prewitt
+ - cleaned up pod regarding 'fixed' and other attributes
+ -from Marc Prewitt
+ - fixed and cleaned up docs for init_code feature. now it is
+ documented to work only with ST and GRT
+ - the string_data attribute for GRT now works and there is the
+ string_data.t test for it.
+ - fixed bug with string_data mode for GRT with descending string sorts
+ added tests for this
+ -from Marc Prewitt
+ - made all appropriate tests support the -bench option
+
+0.01 Wed Mar 24 23:47:32 2004
+ - original version; created by h2xs 1.21 with options
+ -AX -n Sort::Maker
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..e9a0eda
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,24 @@
+Changes
+Makefile.PL
+Sort/Maker.pm
+MANIFEST
+README
+t/errors.t
+t/simple.t
+t/arrays.t
+t/hashes.t
+t/io.t
+t/ref_in_varying.t
+t/descending_grt_string.t
+t/GRT.t
+t/numbers.t
+t/regex.t
+t/init_code.t
+t/string_data.t
+t/name.t
+t/code.t
+t/closure.t
+t/bad_code.t
+t/common.pm
+META.yml Module meta-data (added by MakeMaker)
+slides/sort_maker.txt
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..fc7bd3d
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,10 @@
+# http://module-build.sourceforge.net/META-spec.html
+#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
+name: Sort-Maker
+version: 0.06
+version_from: Sort/Maker.pm
+installdirs: site
+requires:
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.17
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..a482a9d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,728 @@
+# This Makefile is for the Sort extension to perl.
+#
+# It was generated automatically by MakeMaker version
+# 6.17 (Revision: 1.133) from the contents of
+# Makefile.PL. Don't edit this file, edit Makefile.PL instead.
+#
+# ANY CHANGES MADE HERE WILL BE LOST!
+#
+# MakeMaker ARGV: ()
+#
+# MakeMaker Parameters:
+
+# ABSTRACT_FROM => q[Sort/Maker.pm]
+# AUTHOR => q[Uri Guttman ]
+# DISTNAME => q[Sort-Maker]
+# NAME => q[Sort]
+# PREREQ_PM => { }
+# VERSION_FROM => q[Sort/Maker.pm]
+
+# --- MakeMaker post_initialize section:
+
+
+# --- MakeMaker const_config section:
+
+# These definitions are from config.sh (via /usr/local/lib/perl5/5.8.6/sun4-solaris/Config.pm)
+
+# They may have been overridden via Makefile.PL or on the command line
+AR = ar
+CC = gcc
+CCCDLFLAGS = -fPIC
+CCDLFLAGS =
+DLEXT = so
+DLSRC = dl_dlopen.xs
+LD = gcc
+LDDLFLAGS = -G -L/usr/local/lib
+LDFLAGS = -L/usr/local/lib
+LIBC = /lib/libc.so
+LIB_EXT = .a
+OBJ_EXT = .o
+OSNAME = solaris
+OSVERS = 2.9
+RANLIB = :
+SITELIBEXP = /usr/local/lib/perl5/site_perl/5.8.6
+SITEARCHEXP = /usr/local/lib/perl5/site_perl/5.8.6/sun4-solaris
+SO = so
+EXE_EXT =
+FULL_AR = /usr/ccs/bin/ar
+VENDORARCHEXP =
+VENDORLIBEXP =
+
+
+# --- MakeMaker constants section:
+AR_STATIC_ARGS = cr
+DIRFILESEP = /
+NAME = Sort
+NAME_SYM = Sort
+VERSION = 0.06
+VERSION_MACRO = VERSION
+VERSION_SYM = 0_06
+DEFINE_VERSION = -D$(VERSION_MACRO)=\"$(VERSION)\"
+XS_VERSION = 0.06
+XS_VERSION_MACRO = XS_VERSION
+XS_DEFINE_VERSION = -D$(XS_VERSION_MACRO)=\"$(XS_VERSION)\"
+INST_ARCHLIB = blib/arch
+INST_SCRIPT = blib/script
+INST_BIN = blib/bin
+INST_LIB = blib/lib
+INST_MAN1DIR = blib/man1
+INST_MAN3DIR = blib/man3
+MAN1EXT = 1
+MAN3EXT = 3
+INSTALLDIRS = site
+DESTDIR =
+PREFIX =
+PERLPREFIX = /usr/local
+SITEPREFIX = /usr/local
+VENDORPREFIX =
+INSTALLPRIVLIB = $(PERLPREFIX)/lib/perl5/5.8.6
+DESTINSTALLPRIVLIB = $(DESTDIR)$(INSTALLPRIVLIB)
+INSTALLSITELIB = $(SITEPREFIX)/lib/perl5/site_perl/5.8.6
+DESTINSTALLSITELIB = $(DESTDIR)$(INSTALLSITELIB)
+INSTALLVENDORLIB =
+DESTINSTALLVENDORLIB = $(DESTDIR)$(INSTALLVENDORLIB)
+INSTALLARCHLIB = $(PERLPREFIX)/lib/perl5/5.8.6/sun4-solaris
+DESTINSTALLARCHLIB = $(DESTDIR)$(INSTALLARCHLIB)
+INSTALLSITEARCH = $(SITEPREFIX)/lib/perl5/site_perl/5.8.6/sun4-solaris
+DESTINSTALLSITEARCH = $(DESTDIR)$(INSTALLSITEARCH)
+INSTALLVENDORARCH =
+DESTINSTALLVENDORARCH = $(DESTDIR)$(INSTALLVENDORARCH)
+INSTALLBIN = $(PERLPREFIX)/bin
+DESTINSTALLBIN = $(DESTDIR)$(INSTALLBIN)
+INSTALLSITEBIN = $(SITEPREFIX)/bin
+DESTINSTALLSITEBIN = $(DESTDIR)$(INSTALLSITEBIN)
+INSTALLVENDORBIN =
+DESTINSTALLVENDORBIN = $(DESTDIR)$(INSTALLVENDORBIN)
+INSTALLSCRIPT = $(PERLPREFIX)/bin
+DESTINSTALLSCRIPT = $(DESTDIR)$(INSTALLSCRIPT)
+INSTALLMAN1DIR = $(PERLPREFIX)/man/man1
+DESTINSTALLMAN1DIR = $(DESTDIR)$(INSTALLMAN1DIR)
+INSTALLSITEMAN1DIR = $(SITEPREFIX)/man/man1
+DESTINSTALLSITEMAN1DIR = $(DESTDIR)$(INSTALLSITEMAN1DIR)
+INSTALLVENDORMAN1DIR =
+DESTINSTALLVENDORMAN1DIR = $(DESTDIR)$(INSTALLVENDORMAN1DIR)
+INSTALLMAN3DIR = $(PERLPREFIX)/man/man3
+DESTINSTALLMAN3DIR = $(DESTDIR)$(INSTALLMAN3DIR)
+INSTALLSITEMAN3DIR = $(SITEPREFIX)/man/man3
+DESTINSTALLSITEMAN3DIR = $(DESTDIR)$(INSTALLSITEMAN3DIR)
+INSTALLVENDORMAN3DIR =
+DESTINSTALLVENDORMAN3DIR = $(DESTDIR)$(INSTALLVENDORMAN3DIR)
+PERL_LIB = /usr/local/lib/perl5/5.8.6
+PERL_ARCHLIB = /usr/local/lib/perl5/5.8.6/sun4-solaris
+LIBPERL_A = libperl.a
+FIRST_MAKEFILE = Makefile
+MAKEFILE_OLD = $(FIRST_MAKEFILE).old
+MAKE_APERL_FILE = $(FIRST_MAKEFILE).aperl
+PERLMAINCC = $(CC)
+PERL_INC = /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE
+PERL = /usr/local/bin/perl
+FULLPERL = /usr/local/bin/perl
+ABSPERL = $(PERL)
+PERLRUN = $(PERL)
+FULLPERLRUN = $(FULLPERL)
+ABSPERLRUN = $(ABSPERL)
+PERLRUNINST = $(PERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)"
+FULLPERLRUNINST = $(FULLPERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)"
+ABSPERLRUNINST = $(ABSPERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)"
+PERL_CORE = 0
+PERM_RW = 644
+PERM_RWX = 755
+
+MAKEMAKER = /usr/local/lib/perl5/5.8.6/ExtUtils/MakeMaker.pm
+MM_VERSION = 6.17
+MM_REVISION = 1.133
+
+# FULLEXT = Pathname for extension directory (eg Foo/Bar/Oracle).
+# BASEEXT = Basename part of FULLEXT. May be just equal FULLEXT. (eg Oracle)
+# PARENT_NAME = NAME without BASEEXT and no trailing :: (eg Foo::Bar)
+# DLBASE = Basename part of dynamic library. May be just equal BASEEXT.
+FULLEXT = Sort
+BASEEXT = Sort
+PARENT_NAME =
+DLBASE = $(BASEEXT)
+VERSION_FROM = Sort/Maker.pm
+OBJECT =
+LDFROM = $(OBJECT)
+LINKTYPE = dynamic
+
+# Handy lists of source code files:
+XS_FILES =
+C_FILES =
+O_FILES =
+H_FILES =
+MAN1PODS =
+MAN3PODS = Sort/Maker.pm
+
+# Where is the Config information that we are using/depend on
+CONFIGDEP = $(PERL_ARCHLIB)$(DIRFILESEP)Config.pm $(PERL_INC)$(DIRFILESEP)config.h
+
+# Where to build things
+INST_LIBDIR = $(INST_LIB)
+INST_ARCHLIBDIR = $(INST_ARCHLIB)
+
+INST_AUTODIR = $(INST_LIB)/auto/$(FULLEXT)
+INST_ARCHAUTODIR = $(INST_ARCHLIB)/auto/$(FULLEXT)
+
+INST_STATIC =
+INST_DYNAMIC =
+INST_BOOT =
+
+# Extra linker info
+EXPORT_LIST =
+PERL_ARCHIVE =
+PERL_ARCHIVE_AFTER =
+
+
+TO_INST_PM = Sort/Maker.pm
+
+PM_TO_BLIB = Sort/Maker.pm \
+ $(INST_LIB)/Sort/Maker.pm
+
+
+# --- MakeMaker platform_constants section:
+MM_Unix_VERSION = 1.42
+PERL_MALLOC_DEF = -DPERL_EXTMALLOC_DEF -Dmalloc=Perl_malloc -Dfree=Perl_mfree -Drealloc=Perl_realloc -Dcalloc=Perl_calloc
+
+
+# --- MakeMaker tool_autosplit section:
+# Usage: $(AUTOSPLITFILE) FileToSplit AutoDirToSplitInto
+AUTOSPLITFILE = $(PERLRUN) -e 'use AutoSplit; autosplit($$ARGV[0], $$ARGV[1], 0, 1, 1)'
+
+
+
+# --- MakeMaker tool_xsubpp section:
+
+
+# --- MakeMaker tools_other section:
+SHELL = /bin/sh
+CHMOD = chmod
+CP = cp
+MV = mv
+NOOP = $(SHELL) -c true
+NOECHO = @
+RM_F = rm -f
+RM_RF = rm -rf
+TEST_F = test -f
+TOUCH = touch
+UMASK_NULL = umask 0
+DEV_NULL = > /dev/null 2>&1
+MKPATH = $(PERLRUN) "-MExtUtils::Command" -e mkpath
+EQUALIZE_TIMESTAMP = $(PERLRUN) "-MExtUtils::Command" -e eqtime
+ECHO = echo
+ECHO_N = echo -n
+UNINST = 0
+VERBINST = 0
+MOD_INSTALL = $(PERLRUN) -MExtUtils::Install -e 'install({@ARGV}, '\''$(VERBINST)'\'', 0, '\''$(UNINST)'\'');'
+DOC_INSTALL = $(PERLRUN) "-MExtUtils::Command::MM" -e perllocal_install
+UNINSTALL = $(PERLRUN) "-MExtUtils::Command::MM" -e uninstall
+WARN_IF_OLD_PACKLIST = $(PERLRUN) "-MExtUtils::Command::MM" -e warn_if_old_packlist
+
+
+# --- MakeMaker makemakerdflt section:
+makemakerdflt: all
+ $(NOECHO) $(NOOP)
+
+
+# --- MakeMaker dist section:
+TAR = tar
+TARFLAGS = cvf
+ZIP = zip
+ZIPFLAGS = -r
+COMPRESS = gzip --best
+SUFFIX = .gz
+SHAR = shar
+PREOP = $(NOECHO) $(NOOP)
+POSTOP = $(NOECHO) $(NOOP)
+TO_UNIX = $(NOECHO) $(NOOP)
+CI = ci -u
+RCS_LABEL = rcs -Nv$(VERSION_SYM): -q
+DIST_CP = best
+DIST_DEFAULT = tardist
+DISTNAME = Sort-Maker
+DISTVNAME = Sort-Maker-0.06
+
+
+# --- MakeMaker macro section:
+
+
+# --- MakeMaker depend section:
+
+
+# --- MakeMaker cflags section:
+
+
+# --- MakeMaker const_loadlibs section:
+
+
+# --- MakeMaker const_cccmd section:
+
+
+# --- MakeMaker post_constants section:
+
+
+# --- MakeMaker pasthru section:
+
+PASTHRU = LIB="$(LIB)"\
+ LIBPERL_A="$(LIBPERL_A)"\
+ LINKTYPE="$(LINKTYPE)"\
+ PREFIX="$(PREFIX)"\
+ OPTIMIZE="$(OPTIMIZE)"\
+ PASTHRU_DEFINE="$(PASTHRU_DEFINE)"\
+ PASTHRU_INC="$(PASTHRU_INC)"
+
+
+# --- MakeMaker special_targets section:
+.SUFFIXES: .xs .c .C .cpp .i .s .cxx .cc $(OBJ_EXT)
+
+.PHONY: all config static dynamic test linkext manifest
+
+
+
+# --- MakeMaker c_o section:
+
+
+# --- MakeMaker xs_c section:
+
+
+# --- MakeMaker xs_o section:
+
+
+# --- MakeMaker top_targets section:
+all :: pure_all manifypods
+ $(NOECHO) $(NOOP)
+
+
+pure_all :: config pm_to_blib subdirs linkext
+ $(NOECHO) $(NOOP)
+
+subdirs :: $(MYEXTLIB)
+ $(NOECHO) $(NOOP)
+
+config :: $(FIRST_MAKEFILE) $(INST_LIBDIR)$(DIRFILESEP).exists
+ $(NOECHO) $(NOOP)
+
+config :: $(INST_ARCHAUTODIR)$(DIRFILESEP).exists
+ $(NOECHO) $(NOOP)
+
+config :: $(INST_AUTODIR)$(DIRFILESEP).exists
+ $(NOECHO) $(NOOP)
+
+$(INST_AUTODIR)/.exists :: /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE/perl.h
+ $(NOECHO) $(MKPATH) $(INST_AUTODIR)
+ $(NOECHO) $(EQUALIZE_TIMESTAMP) /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE/perl.h $(INST_AUTODIR)/.exists
+
+ -$(NOECHO) $(CHMOD) $(PERM_RWX) $(INST_AUTODIR)
+
+$(INST_LIBDIR)/.exists :: /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE/perl.h
+ $(NOECHO) $(MKPATH) $(INST_LIBDIR)
+ $(NOECHO) $(EQUALIZE_TIMESTAMP) /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE/perl.h $(INST_LIBDIR)/.exists
+
+ -$(NOECHO) $(CHMOD) $(PERM_RWX) $(INST_LIBDIR)
+
+$(INST_ARCHAUTODIR)/.exists :: /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE/perl.h
+ $(NOECHO) $(MKPATH) $(INST_ARCHAUTODIR)
+ $(NOECHO) $(EQUALIZE_TIMESTAMP) /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE/perl.h $(INST_ARCHAUTODIR)/.exists
+
+ -$(NOECHO) $(CHMOD) $(PERM_RWX) $(INST_ARCHAUTODIR)
+
+config :: $(INST_MAN3DIR)$(DIRFILESEP).exists
+ $(NOECHO) $(NOOP)
+
+
+$(INST_MAN3DIR)/.exists :: /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE/perl.h
+ $(NOECHO) $(MKPATH) $(INST_MAN3DIR)
+ $(NOECHO) $(EQUALIZE_TIMESTAMP) /usr/local/lib/perl5/5.8.6/sun4-solaris/CORE/perl.h $(INST_MAN3DIR)/.exists
+
+ -$(NOECHO) $(CHMOD) $(PERM_RWX) $(INST_MAN3DIR)
+
+help:
+ perldoc ExtUtils::MakeMaker
+
+
+# --- MakeMaker linkext section:
+
+linkext :: $(LINKTYPE)
+ $(NOECHO) $(NOOP)
+
+
+# --- MakeMaker dlsyms section:
+
+
+# --- MakeMaker dynamic section:
+
+dynamic :: $(FIRST_MAKEFILE) $(INST_DYNAMIC) $(INST_BOOT)
+ $(NOECHO) $(NOOP)
+
+
+# --- MakeMaker dynamic_bs section:
+
+BOOTSTRAP =
+
+
+# --- MakeMaker dynamic_lib section:
+
+
+# --- MakeMaker static section:
+
+## $(INST_PM) has been moved to the all: target.
+## It remains here for awhile to allow for old usage: "make static"
+static :: $(FIRST_MAKEFILE) $(INST_STATIC)
+ $(NOECHO) $(NOOP)
+
+
+# --- MakeMaker static_lib section:
+
+
+# --- MakeMaker manifypods section:
+
+POD2MAN_EXE = $(PERLRUN) "-MExtUtils::Command::MM" -e pod2man "--"
+POD2MAN = $(POD2MAN_EXE)
+
+
+manifypods : pure_all \
+ Sort/Maker.pm \
+ Sort/Maker.pm
+ $(NOECHO) $(POD2MAN) --section=3 --perm_rw=$(PERM_RW)\
+ Sort/Maker.pm $(INST_MAN3DIR)/Sort::Maker.$(MAN3EXT)
+
+
+
+
+# --- MakeMaker processPL section:
+
+
+# --- MakeMaker installbin section:
+
+
+# --- MakeMaker subdirs section:
+
+# none
+
+# --- MakeMaker clean_subdirs section:
+clean_subdirs :
+ $(NOECHO) $(NOOP)
+
+
+# --- MakeMaker clean section:
+
+# Delete temporary files but do not touch installed files. We don't delete
+# the Makefile here so a later make realclean still has a makefile to use.
+
+clean :: clean_subdirs
+ -$(RM_RF) ./blib $(MAKE_APERL_FILE) $(INST_ARCHAUTODIR)/extralibs.all $(INST_ARCHAUTODIR)/extralibs.ld perlmain.c tmon.out mon.out so_locations pm_to_blib *$(OBJ_EXT) *$(LIB_EXT) perl.exe perl perl$(EXE_EXT) $(BOOTSTRAP) $(BASEEXT).bso $(BASEEXT).def lib$(BASEEXT).def $(BASEEXT).exp $(BASEEXT).x core core.*perl.*.? *perl.core core.[0-9] core.[0-9][0-9] core.[0-9][0-9][0-9] core.[0-9][0-9][0-9][0-9] core.[0-9][0-9][0-9][0-9][0-9]
+ -$(MV) $(FIRST_MAKEFILE) $(MAKEFILE_OLD) $(DEV_NULL)
+
+
+# --- MakeMaker realclean_subdirs section:
+realclean_subdirs :
+ $(NOECHO) $(NOOP)
+
+
+# --- MakeMaker realclean section:
+
+# Delete temporary files (via clean) and also delete installed files
+realclean purge :: clean realclean_subdirs
+ $(RM_RF) $(INST_AUTODIR) $(INST_ARCHAUTODIR)
+ $(RM_RF) $(DISTVNAME)
+ $(RM_F) $(INST_LIB)/Sort/Maker.pm $(MAKEFILE_OLD) $(FIRST_MAKEFILE)
+
+
+# --- MakeMaker metafile section:
+metafile :
+ $(NOECHO) $(ECHO) '# http://module-build.sourceforge.net/META-spec.html' > META.yml
+ $(NOECHO) $(ECHO) '#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#' >> META.yml
+ $(NOECHO) $(ECHO) 'name: Sort-Maker' >> META.yml
+ $(NOECHO) $(ECHO) 'version: 0.06' >> META.yml
+ $(NOECHO) $(ECHO) 'version_from: Sort/Maker.pm' >> META.yml
+ $(NOECHO) $(ECHO) 'installdirs: site' >> META.yml
+ $(NOECHO) $(ECHO) 'requires:' >> META.yml
+ $(NOECHO) $(ECHO) '' >> META.yml
+ $(NOECHO) $(ECHO) 'distribution_type: module' >> META.yml
+ $(NOECHO) $(ECHO) 'generated_by: ExtUtils::MakeMaker version 6.17' >> META.yml
+
+
+# --- MakeMaker metafile_addtomanifest section:
+metafile_addtomanifest:
+ $(NOECHO) $(PERLRUN) -MExtUtils::Manifest=maniadd -e 'eval { maniadd({q{META.yml} => q{Module meta-data (added by MakeMaker)}}) } ' \
+ -e ' or print "Could not add META.yml to MANIFEST: $${'\''@'\''}\n"'
+
+
+# --- MakeMaker dist_basics section:
+distclean :: realclean distcheck
+ $(NOECHO) $(NOOP)
+
+distcheck :
+ $(PERLRUN) "-MExtUtils::Manifest=fullcheck" -e fullcheck
+
+skipcheck :
+ $(PERLRUN) "-MExtUtils::Manifest=skipcheck" -e skipcheck
+
+manifest :
+ $(PERLRUN) "-MExtUtils::Manifest=mkmanifest" -e mkmanifest
+
+veryclean : realclean
+ $(RM_F) *~ *.orig */*~ */*.orig
+
+
+
+# --- MakeMaker dist_core section:
+
+dist : $(DIST_DEFAULT) $(FIRST_MAKEFILE)
+ $(NOECHO) $(PERLRUN) -l -e 'print '\''Warning: Makefile possibly out of date with $(VERSION_FROM)'\''' \
+ -e ' if -e '\''$(VERSION_FROM)'\'' and -M '\''$(VERSION_FROM)'\'' < -M '\''$(FIRST_MAKEFILE)'\'';'
+
+tardist : $(DISTVNAME).tar$(SUFFIX)
+ $(NOECHO) $(NOOP)
+
+uutardist : $(DISTVNAME).tar$(SUFFIX)
+ uuencode $(DISTVNAME).tar$(SUFFIX) $(DISTVNAME).tar$(SUFFIX) > $(DISTVNAME).tar$(SUFFIX)_uu
+
+$(DISTVNAME).tar$(SUFFIX) : distdir
+ $(PREOP)
+ $(TO_UNIX)
+ $(TAR) $(TARFLAGS) $(DISTVNAME).tar $(DISTVNAME)
+ $(RM_RF) $(DISTVNAME)
+ $(COMPRESS) $(DISTVNAME).tar
+ $(POSTOP)
+
+zipdist : $(DISTVNAME).zip
+ $(NOECHO) $(NOOP)
+
+$(DISTVNAME).zip : distdir
+ $(PREOP)
+ $(ZIP) $(ZIPFLAGS) $(DISTVNAME).zip $(DISTVNAME)
+ $(RM_RF) $(DISTVNAME)
+ $(POSTOP)
+
+shdist : distdir
+ $(PREOP)
+ $(SHAR) $(DISTVNAME) > $(DISTVNAME).shar
+ $(RM_RF) $(DISTVNAME)
+ $(POSTOP)
+
+
+# --- MakeMaker distdir section:
+distdir : metafile metafile_addtomanifest
+ $(RM_RF) $(DISTVNAME)
+ $(PERLRUN) "-MExtUtils::Manifest=manicopy,maniread" \
+ -e "manicopy(maniread(),'$(DISTVNAME)', '$(DIST_CP)');"
+
+
+
+# --- MakeMaker dist_test section:
+
+disttest : distdir
+ cd $(DISTVNAME) && $(ABSPERLRUN) Makefile.PL
+ cd $(DISTVNAME) && $(MAKE) $(PASTHRU)
+ cd $(DISTVNAME) && $(MAKE) test $(PASTHRU)
+
+
+# --- MakeMaker dist_ci section:
+
+ci :
+ $(PERLRUN) "-MExtUtils::Manifest=maniread" \
+ -e "@all = keys %{ maniread() };" \
+ -e "print(qq{Executing $(CI) @all\n}); system(qq{$(CI) @all});" \
+ -e "print(qq{Executing $(RCS_LABEL) ...\n}); system(qq{$(RCS_LABEL) @all});"
+
+
+# --- MakeMaker install section:
+
+install :: all pure_install doc_install
+
+install_perl :: all pure_perl_install doc_perl_install
+
+install_site :: all pure_site_install doc_site_install
+
+install_vendor :: all pure_vendor_install doc_vendor_install
+
+pure_install :: pure_$(INSTALLDIRS)_install
+
+doc_install :: doc_$(INSTALLDIRS)_install
+
+pure__install : pure_site_install
+ $(NOECHO) $(ECHO) INSTALLDIRS not defined, defaulting to INSTALLDIRS=site
+
+doc__install : doc_site_install
+ $(NOECHO) $(ECHO) INSTALLDIRS not defined, defaulting to INSTALLDIRS=site
+
+pure_perl_install ::
+ $(NOECHO) $(MOD_INSTALL) \
+ read $(PERL_ARCHLIB)/auto/$(FULLEXT)/.packlist \
+ write $(DESTINSTALLARCHLIB)/auto/$(FULLEXT)/.packlist \
+ $(INST_LIB) $(DESTINSTALLPRIVLIB) \
+ $(INST_ARCHLIB) $(DESTINSTALLARCHLIB) \
+ $(INST_BIN) $(DESTINSTALLBIN) \
+ $(INST_SCRIPT) $(DESTINSTALLSCRIPT) \
+ $(INST_MAN1DIR) $(DESTINSTALLMAN1DIR) \
+ $(INST_MAN3DIR) $(DESTINSTALLMAN3DIR)
+ $(NOECHO) $(WARN_IF_OLD_PACKLIST) \
+ $(SITEARCHEXP)/auto/$(FULLEXT)
+
+
+pure_site_install ::
+ $(NOECHO) $(MOD_INSTALL) \
+ read $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist \
+ write $(DESTINSTALLSITEARCH)/auto/$(FULLEXT)/.packlist \
+ $(INST_LIB) $(DESTINSTALLSITELIB) \
+ $(INST_ARCHLIB) $(DESTINSTALLSITEARCH) \
+ $(INST_BIN) $(DESTINSTALLSITEBIN) \
+ $(INST_SCRIPT) $(DESTINSTALLSCRIPT) \
+ $(INST_MAN1DIR) $(DESTINSTALLSITEMAN1DIR) \
+ $(INST_MAN3DIR) $(DESTINSTALLSITEMAN3DIR)
+ $(NOECHO) $(WARN_IF_OLD_PACKLIST) \
+ $(PERL_ARCHLIB)/auto/$(FULLEXT)
+
+pure_vendor_install ::
+ $(NOECHO) $(MOD_INSTALL) \
+ read $(VENDORARCHEXP)/auto/$(FULLEXT)/.packlist \
+ write $(DESTINSTALLVENDORARCH)/auto/$(FULLEXT)/.packlist \
+ $(INST_LIB) $(DESTINSTALLVENDORLIB) \
+ $(INST_ARCHLIB) $(DESTINSTALLVENDORARCH) \
+ $(INST_BIN) $(DESTINSTALLVENDORBIN) \
+ $(INST_SCRIPT) $(DESTINSTALLSCRIPT) \
+ $(INST_MAN1DIR) $(DESTINSTALLVENDORMAN1DIR) \
+ $(INST_MAN3DIR) $(DESTINSTALLVENDORMAN3DIR)
+
+doc_perl_install ::
+ $(NOECHO) $(ECHO) Appending installation info to $(DESTINSTALLARCHLIB)/perllocal.pod
+ -$(NOECHO) $(MKPATH) $(DESTINSTALLARCHLIB)
+ -$(NOECHO) $(DOC_INSTALL) \
+ "Module" "$(NAME)" \
+ "installed into" "$(INSTALLPRIVLIB)" \
+ LINKTYPE "$(LINKTYPE)" \
+ VERSION "$(VERSION)" \
+ EXE_FILES "$(EXE_FILES)" \
+ >> $(DESTINSTALLARCHLIB)/perllocal.pod
+
+doc_site_install ::
+ $(NOECHO) $(ECHO) Appending installation info to $(DESTINSTALLARCHLIB)/perllocal.pod
+ -$(NOECHO) $(MKPATH) $(DESTINSTALLARCHLIB)
+ -$(NOECHO) $(DOC_INSTALL) \
+ "Module" "$(NAME)" \
+ "installed into" "$(INSTALLSITELIB)" \
+ LINKTYPE "$(LINKTYPE)" \
+ VERSION "$(VERSION)" \
+ EXE_FILES "$(EXE_FILES)" \
+ >> $(DESTINSTALLARCHLIB)/perllocal.pod
+
+doc_vendor_install ::
+ $(NOECHO) $(ECHO) Appending installation info to $(DESTINSTALLARCHLIB)/perllocal.pod
+ -$(NOECHO) $(MKPATH) $(DESTINSTALLARCHLIB)
+ -$(NOECHO) $(DOC_INSTALL) \
+ "Module" "$(NAME)" \
+ "installed into" "$(INSTALLVENDORLIB)" \
+ LINKTYPE "$(LINKTYPE)" \
+ VERSION "$(VERSION)" \
+ EXE_FILES "$(EXE_FILES)" \
+ >> $(DESTINSTALLARCHLIB)/perllocal.pod
+
+
+uninstall :: uninstall_from_$(INSTALLDIRS)dirs
+
+uninstall_from_perldirs ::
+ $(NOECHO) $(UNINSTALL) $(PERL_ARCHLIB)/auto/$(FULLEXT)/.packlist
+
+uninstall_from_sitedirs ::
+ $(NOECHO) $(UNINSTALL) $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist
+
+uninstall_from_vendordirs ::
+ $(NOECHO) $(UNINSTALL) $(VENDORARCHEXP)/auto/$(FULLEXT)/.packlist
+
+
+# --- MakeMaker force section:
+# Phony target to force checking subdirectories.
+FORCE:
+ $(NOECHO) $(NOOP)
+
+
+# --- MakeMaker perldepend section:
+
+
+# --- MakeMaker makefile section:
+
+# We take a very conservative approach here, but it's worth it.
+# We move Makefile to Makefile.old here to avoid gnu make looping.
+$(FIRST_MAKEFILE) : Makefile.PL $(CONFIGDEP)
+ $(NOECHO) $(ECHO) "Makefile out-of-date with respect to $?"
+ $(NOECHO) $(ECHO) "Cleaning current config before rebuilding Makefile..."
+ $(NOECHO) $(RM_F) $(MAKEFILE_OLD)
+ $(NOECHO) $(MV) $(FIRST_MAKEFILE) $(MAKEFILE_OLD)
+ -$(MAKE) -f $(MAKEFILE_OLD) clean $(DEV_NULL) || $(NOOP)
+ $(PERLRUN) Makefile.PL
+ $(NOECHO) $(ECHO) "==> Your Makefile has been rebuilt. <=="
+ $(NOECHO) $(ECHO) "==> Please rerun the make command. <=="
+ false
+
+
+
+# --- MakeMaker staticmake section:
+
+# --- MakeMaker makeaperl section ---
+MAP_TARGET = perl
+FULLPERL = /usr/local/bin/perl
+
+$(MAP_TARGET) :: static $(MAKE_APERL_FILE)
+ $(MAKE) -f $(MAKE_APERL_FILE) $@
+
+$(MAKE_APERL_FILE) : $(FIRST_MAKEFILE)
+ $(NOECHO) $(ECHO) Writing \"$(MAKE_APERL_FILE)\" for this $(MAP_TARGET)
+ $(NOECHO) $(PERLRUNINST) \
+ Makefile.PL DIR= \
+ MAKEFILE=$(MAKE_APERL_FILE) LINKTYPE=static \
+ MAKEAPERL=1 NORECURS=1 CCCDLFLAGS=
+
+
+# --- MakeMaker test section:
+
+TEST_VERBOSE=0
+TEST_TYPE=test_$(LINKTYPE)
+TEST_FILE = test.pl
+TEST_FILES = t/*.t
+TESTDB_SW = -d
+
+testdb :: testdb_$(LINKTYPE)
+
+test :: $(TEST_TYPE)
+
+test_dynamic :: pure_all
+ PERL_DL_NONLAZY=1 $(FULLPERLRUN) "-MExtUtils::Command::MM" "-e" "test_harness($(TEST_VERBOSE), '$(INST_LIB)', '$(INST_ARCHLIB)')" $(TEST_FILES)
+
+testdb_dynamic :: pure_all
+ PERL_DL_NONLAZY=1 $(FULLPERLRUN) $(TESTDB_SW) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE)
+
+test_ : test_dynamic
+
+test_static :: test_dynamic
+testdb_static :: testdb_dynamic
+
+
+# --- MakeMaker ppd section:
+# Creates a PPD (Perl Package Description) for a binary distribution.
+ppd:
+ $(NOECHO) $(ECHO) '' > $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) ' $(DISTNAME)' >> $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) ' A simple way to make efficient sort subs' >> $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) ' Uri Guttman <uri@cpan.org>' >> $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) ' ' >> $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) '' >> $(DISTNAME).ppd
+
+
+# --- MakeMaker pm_to_blib section:
+
+pm_to_blib: $(TO_INST_PM)
+ $(NOECHO) $(PERLRUN) -MExtUtils::Install -e 'pm_to_blib({@ARGV}, '\''$(INST_LIB)/auto'\'', '\''$(PM_FILTER)'\'')'\
+ Sort/Maker.pm $(INST_LIB)/Sort/Maker.pm
+ $(NOECHO) $(TOUCH) $@
+
+# --- MakeMaker selfdocument section:
+
+
+# --- MakeMaker postamble section:
+
+
+# End.
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..d8d6b30
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,12 @@
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ 'NAME' => 'Sort',
+ 'DISTNAME' => 'Sort-Maker',
+ 'VERSION_FROM' => 'Sort/Maker.pm', # finds $VERSION
+ 'PREREQ_PM' => {}, # e.g., Module::Name => 1.1
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT_FROM => 'Sort/Maker.pm', # retrieve abstract from module
+ AUTHOR => 'Uri Guttman ') : ()),
+);
diff --git a/README b/README
new file mode 100644
index 0000000..eca556c
--- /dev/null
+++ b/README
@@ -0,0 +1,28 @@
+Sort/Maker version 0.06
+=======================
+
+This module has two main goals: to make it easy to create correct sort
+functions, and to make it simple to select the optimum sorting algorithm
+for the number of items to be sorted. Sort::Maker generates complete
+sort subroutines in one of four styles, plain, orcish manouver,
+Schwartzian Transform and the Guttman-Rosler Transform. You can also get
+the source for a sort sub you create via the sorter_source call.
+
+INSTALLATION
+
+To install this module type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+DEPENDENCIES
+
+None.
+
+COPYRIGHT AND LICENCE
+
+Licensed the same as Perl.
+
+Copyright (C) 2004-2006 Uri Guttman uri@sysarch.com
diff --git a/Sort/Maker.pm b/Sort/Maker.pm
new file mode 100644
index 0000000..a6084a4
--- /dev/null
+++ b/Sort/Maker.pm
@@ -0,0 +1,1789 @@
+package Sort::Maker;
+
+use strict;
+use base qw(Exporter);
+
+use Data::Dumper ;
+
+our @EXPORT = qw( make_sorter );
+our %EXPORT_TAGS = ( 'all' => [ qw( sorter_source ), @EXPORT ] );
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our $VERSION = '0.06';
+
+
+# get integer and float sizes endian order
+
+my $FLOAT_LEN = length pack "d", 1 ;
+my $INT_LEN = length pack "N", 1 ;
+my $INT_BIT_LEN = $INT_LEN * 8 ;
+my $IS_BIG_ENDIAN = pack('N', 1) eq pack('L', 1) ;
+
+my @boolean_attrs = qw(
+ ascending
+ descending
+ case
+ no_case
+ signed
+ unsigned
+ signed_float
+ unsigned_float
+ varying
+ closure
+) ;
+
+my @value_attrs = qw(
+ fixed
+) ;
+
+my @grt_num_attrs = qw(
+ signed
+ unsigned
+ signed_float
+ unsigned_float
+) ;
+
+my @grt_string_attrs = qw(
+ varying
+ fixed
+) ;
+
+# these key attributes set are mutually exclusive
+# only one can be set in the defaults or in any given key
+
+my @mutex_attrs = (
+ [qw(case no_case)],
+ [qw(ascending descending)],
+ \@grt_num_attrs,
+ \@grt_string_attrs,
+) ;
+
+
+# code can only be an attribute and not a default attribute argument
+
+my %is_boolean_attr = map { $_ => 1 } @boolean_attrs ;
+my %is_value_attr = map { $_ => 1 } @value_attrs, 'code' ;
+
+my @boolean_args = qw(
+ ref_in
+ ref_out
+ string_data
+) ;
+
+my @value_args = qw(
+ name
+ init_code
+) ;
+
+# all the attributes can be set with defaults
+
+my %is_boolean_arg = map { $_ => 1 } @boolean_args, @boolean_attrs ;
+my %is_value_arg = map { $_ => 1 } @value_args, @value_attrs ;
+
+my @key_types = qw(
+ string
+ number
+) ;
+
+my %is_key_arg = map { $_ => 1 } @key_types ;
+
+my %sort_makers = (
+
+ plain => \&_make_plain_sort,
+ orcish => \&_make_orcish_sort,
+ ST => \&_make_ST_sort,
+ GRT => \&_make_GRT_sort,
+) ;
+
+my %is_arg = ( %is_key_arg, %sort_makers, %is_value_arg, %is_boolean_arg ) ;
+
+my %sources ;
+
+# this is a file lexical so the WARN handler sub can see it.
+
+my $eval_warnings = '' ;
+
+sub make_sorter {
+
+# clear any leftover errors
+
+ $@ = '' ;
+
+# process @_ without copying it (&sub with no args)
+
+ my( $options, $keys, $closures ) = &_process_arguments ;
+ return unless $keys ;
+
+ my @closures = _get_extractor_code( $options, $keys ) ;
+
+ return if $@ ;
+
+# get the sort maker for this style and build the sorter
+
+ my $sort_maker = $sort_makers{ $options->{style} } ;
+ my $source = $sort_maker->( $options, $keys ) ;
+ return unless $source ;
+
+# prepend code to access any closures
+
+ if ( @closures ) {
+
+ my $closure_text = join '', map <{style}.
+Check the key extraction code for errors.
+
+$source
+$eval_warnings
+$@
+ERR
+
+# install the sorter sub in the caller's package if a name was set
+
+ if ( my $name = $options->{name} ) {
+
+ no strict 'refs' ;
+
+ my $package = (caller())[0] ;
+
+ *{"${package}::$name"} = $sorter ;
+ }
+
+ return $sorter ;
+}
+
+sub _process_arguments {
+
+ my( %options, @keys ) ;
+
+ while( @_ ) {
+
+ my $opt = shift ;
+
+ if ( $sort_makers{ $opt } ) {
+
+ $@ =
+ "make_sorter: Style was already set to '$options{ style }'",
+ return if $options{ style } ;
+
+# handle optional boolean => 1
+ shift if @_ && $_[0] eq '1' ;
+ $options{ style } = $opt ;
+ $options{ $opt } = 1 ;
+
+ next ;
+ }
+
+ if ( $is_boolean_arg{ $opt } ) {
+
+# handle optional boolean => 1
+ shift if @_ && $_[0] eq '1' ;
+ $options{ $opt } = 1 ;
+ next ;
+ }
+
+ if ( $is_value_arg{ $opt } ) {
+
+ $@ = "make_sorter: No value for argument '$opt'\n",
+ return unless @_ ;
+
+ $options{ $opt } = shift ;
+ next ;
+ }
+
+ if ( $is_key_arg{ $opt } ) {
+
+ my $key_desc = $_[0] ;
+
+# if we have no key value or it is an option, we just have a single key.
+
+ if ( !defined( $key_desc ) || $is_arg{ $key_desc } ) {
+
+ push( @keys, {
+ type => $opt,
+ }
+ ) ;
+
+ next ;
+ }
+
+# if we have a hash ref for the value, it is the description for this key
+
+ if( ref $key_desc eq 'HASH' ) {
+
+ shift @_ ;
+ $key_desc->{type} = $opt ;
+ push( @keys, $key_desc ) ;
+ next ;
+ }
+
+# if we have an array ref for the value, it is the description for this key
+
+ if( ref $key_desc eq 'ARRAY' ) {
+
+ $key_desc = _process_array_attrs(@{$key_desc}) ;
+ return unless $key_desc ;
+
+ shift @_ ;
+ $key_desc->{type} = $opt ;
+ push( @keys, $key_desc ) ;
+ next ;
+ }
+
+# not a hash ref or an option/key so it must be code for the key
+
+ shift ;
+ push( @keys, {
+ type => $opt,
+ code => $key_desc,
+ }
+ ) ;
+ next ;
+ }
+
+ $@ = "make_sorter: Unknown option or key '$opt'\n" ;
+ return ;
+ }
+
+ unless( @keys ) {
+ $@ = 'make_sorter: No keys specified' ;
+ return ;
+ }
+
+ unless( $options{style} ) {
+ $@ = 'make_sorter: No sort style selected' ;
+ return ;
+ }
+
+ return unless _process_defaults( \%options, \@keys ) ;
+
+ return( \%options, \@keys ) ;
+}
+
+sub _process_defaults {
+
+ my( $opts, $keys ) = @_ ;
+
+ return if _has_mutex_attrs( $opts, 'defaults have' ) ;
+
+ $opts->{init_code} ||= '' ;
+
+ foreach my $key ( @{$keys} ) {
+
+ return if _has_mutex_attrs( $key, 'key has' ) ;
+
+# set descending if it is not ascending and the default is descending.
+
+ $key->{'descending'} ||=
+ !$key->{'ascending'} && $opts->{'descending'} ;
+
+# set no_case if it is not case and the default is no_case.
+
+ $key->{'no_case'} ||=
+ !$key->{'case'} && $opts->{'no_case'} ;
+
+# handle GRT default attrs, both number and string
+# don't use the default if an attribute is set in the key
+
+ unless( grep( $key->{$_}, @grt_num_attrs ) ) {
+
+ @{$key}{@grt_num_attrs} = @{$opts}{@grt_num_attrs} ;
+ }
+
+ unless( grep( $key->{$_}, @grt_string_attrs ) ) {
+
+ @{$key}{@grt_string_attrs} =
+ @{$opts}{@grt_string_attrs} ;
+ }
+ }
+
+ return 1 ;
+}
+
+
+sub _get_extractor_code {
+
+ my( $opts, $keys ) = @_ ;
+
+ my( @closures, $deparser ) ;
+
+ foreach my $key ( @{$keys} ) {
+
+ my $extract_code = $key->{code} ;
+
+# default extract code is $_
+
+ unless( $extract_code ) {
+
+ $key->{code} = '$_' ;
+ next ;
+ }
+
+ my $extractor_type = ref $extract_code ;
+
+# leave the extractor code alone if it is a string
+
+ next unless $extractor_type ;
+
+# wrap regexes in m()
+
+ if( $extractor_type eq 'Regexp' ) {
+
+ $key->{code} = "m($extract_code)" ;
+ next ;
+ }
+
+# return an error if it is not a CODE ref
+
+ unless( $extractor_type eq 'CODE' ) {
+
+ $@ = "$extract_code is not a CODE or Regexp reference" ;
+ return ;
+ }
+
+# must be a code reference
+# see if it is a closure
+
+ if ( $opts->{closure} || $key->{closure} ) {
+
+# generate the code that will call this closure
+
+ my $n = @closures ;
+ $key->{code} = "\$closure$n->()" ;
+
+#print "CODE $key->{code}\n" ;
+
+# copy the closure so we can process them later
+
+ push @closures, $extract_code ;
+ next ;
+ }
+
+# Otherwise, try to decompile the code ref with B::Deparse...
+
+ unless( require B::Deparse ) {
+
+ $@ = <new("-p", "-sC");
+
+ my $source = eval { $deparser->coderef2text( $extract_code ) } ;
+
+ unless( $source ) {
+
+ $@ = "Can't use [$extract_code] as key extractor";
+ return ;
+ }
+
+ #print "S [$source]\n" ;
+
+# use just the juicy pulp inside the braces...
+
+ $key->{code} = "do $source" ;
+ }
+
+ return @closures ;
+}
+
+
+# this is used to check for any mutually exclusive attribute in
+# defaults or keys
+
+sub _has_mutex_attrs {
+
+ my( $href, $name ) = @_ ;
+
+ foreach my $mutex ( @mutex_attrs ) {
+
+ my @bad_attrs = grep $href->{$_}, @{$mutex} ;
+
+ next if @bad_attrs <= 1 ;
+
+ $@ = "make_sorter: Key attribute conflict: '$name @bad_attrs'";
+ return 1 ;
+ }
+
+ return ;
+}
+
+sub _process_array_attrs {
+
+ my( @attrs ) = @_ ;
+
+ my $desc ;
+
+ while( @attrs ) {
+
+ my $attr = shift @attrs ;
+
+#print "ATTR $attr\n" ;
+
+ if ( $is_boolean_attr{ $attr } ) {
+
+ shift @attrs if $attrs[0] eq '1' ;
+ $desc->{ $attr } = 1 ;
+ next ;
+ }
+
+ if ( $is_value_attr{ $attr } ) {
+
+ $@ = "make_sorter: No value for attribute '$attr'",
+ return unless @attrs ;
+
+ $desc->{ $attr } = shift @attrs ;
+ next ;
+ }
+
+ $@ = "make_sorter: Unknown attribute '$attr'" ;
+ return ;
+ }
+
+ return( $desc ) ;
+}
+
+sub _make_plain_sort {
+
+ my( $options, $keys ) = @_ ;
+
+ my( @plain_compares ) ;
+
+ foreach my $key ( @{$keys} ) {
+
+ my $plain_compare = <{descending} ;
+ $plain_compare =~ s/cmp/<=>/ if $key->{type} eq 'number' ;
+ $plain_compare =~ s/uc //g
+ unless $key->{type} eq 'string' && $key->{no_case} ;
+ $plain_compare =~ s/EXTRACT/$key->{code}/ ;
+
+ push( @plain_compares, $plain_compare ) ;
+ }
+
+# build the full compare block
+
+ my $compare_source = join "\t\t||\n", @plain_compares ;
+
+# handle the in/out as ref options
+
+ my $input = $options->{ref_in} ? '@{$_[0]}' : '@_' ;
+ my( $open_bracket, $close_bracket ) = $options->{ref_out} ?
+ qw( [ ] ) : ( '', '' ) ;
+
+ my $source = <{init_code}
+ $open_bracket
+ sort {
+$compare_source
+ } $input $close_bracket ;
+}
+SUB
+
+ return $source ;
+}
+
+sub _make_orcish_sort {
+
+ my( $options, $keys ) = @_ ;
+
+ my( @orcish_compares ) ;
+
+ my $orc_ind = '1' ;
+
+ foreach my $key ( @{$keys} ) {
+
+ my( $l, $r ) = $key->{descending} ? qw( $b $a ) : qw( $a $b ) ;
+
+ my $orcish_compare = <{descending} ;
+ $orcish_compare =~ s/cmp/<=>/ if $key->{type} eq 'number' ;
+ $orcish_compare =~ s/uc //g
+ unless $key->{type} eq 'string' && $key->{no_case} ;
+
+ $orcish_compare =~ s/EXTRACT/$key->{code}/g ;
+
+ push( @orcish_compares, $orcish_compare ) ;
+ }
+
+# build the full compare block
+
+ my $compare_source = join "\t\t||\n", @orcish_compares ;
+
+# handle the in/out as ref options
+
+ my $input = $options->{ref_in} ? '@{$_[0]}' : '@_' ;
+ my( $open_bracket, $close_bracket ) = $options->{ref_out} ?
+ qw( [ ] ) : ( '', '' ) ;
+
+ my $cache_dcl = join( ',', map "%or_cache$_", 1 .. @{$keys} ) ;
+
+ my $source = <{init_code}
+ my ( $cache_dcl ) ;
+
+ $open_bracket
+ sort {
+$compare_source
+ } $input $close_bracket ;
+}
+SUB
+
+ return $source ;
+}
+
+sub _make_ST_sort {
+
+ my( $options, $keys ) = @_ ;
+
+ my( @st_compares, @st_extracts ) ;
+ my $st_ind = '1' ;
+
+ foreach my $key ( @{$keys} ) {
+
+#print Dumper $key ;
+
+ my $st_compare = <[$st_ind] cmp \$b->[$st_ind]
+CMP
+
+ $st_compare =~ tr/ab/ba/ if $key->{descending} ;
+ $st_compare =~ s/cmp/<=>/ if $key->{type} eq 'number' ;
+
+ $st_ind++ ;
+
+ push( @st_compares, $st_compare ) ;
+
+ my $st_extract = <{type} eq 'string' && $key->{no_case} ;
+ $st_extract =~ s/EXTRACT/$key->{code}/ ;
+
+ chomp( $st_extract ) ;
+ push( @st_extracts, $st_extract ) ;
+ }
+
+# build the full compare block
+
+ my $compare_source = join "\t\t||\n", @st_compares ;
+
+# build the full code for the key extracts
+
+ my $extract_source = join ",\n", @st_extracts ;
+
+# handle the in/out as ref options
+
+ my $input = $options->{ref_in} ? '@{$_[0]}' : '@_' ;
+ my( $open_bracket, $close_bracket ) = $options->{ref_out} ?
+ qw( [ ] ) : ( '', '' ) ;
+
+ my $source = <{init_code}
+ return $open_bracket
+ map \$_->[0],
+ sort {
+$compare_source
+ }
+ map [ \$_,
+$extract_source
+ ], $input $close_bracket ;
+}
+SUB
+
+}
+
+sub _make_GRT_sort {
+
+ my( $options, $keys ) = @_ ;
+
+ my( $pack_format, @grt_extracts ) ;
+
+ my $init_code = $options->{init_code} ;
+
+# select the input as a list - either an array ref or plain @_
+
+ my $input = $options->{ref_in} ? '@{$_[0]}' : '@_' ;
+
+# use this to count keys so we can generate init_code for each key
+
+ my $key_ind = '0' ;
+
+ foreach my $key ( @{$keys} ) {
+
+#print Dumper $key ;
+
+ my( $key_pack_format, $grt_extract, $key_init_code ) =
+ $key->{type} eq 'number' ?
+ _make_GRT_number_key( $key ) :
+ _make_GRT_string_key( $key, $key_ind++ ) ;
+
+#print "[$key_pack_format] [$grt_extract] [$key_init_code]\n" ;
+
+ return unless $key_pack_format ;
+
+ $pack_format .= $key_pack_format ;
+
+ if ( $key_init_code ) {
+
+# fix generated init_code that scans input to use the proper input
+
+ $key_init_code =~ s/INPUT$/$input/m ;
+ $init_code .= $key_init_code ;
+ }
+
+ chomp( $grt_extract ) ;
+ push( @grt_extracts, $grt_extract ) ;
+ }
+
+############
+# pack the record index.
+# SKIP for 'string_data' attribute
+##########
+
+ $pack_format .= 'N' unless $options->{string_data} ;
+
+ my $extract_source = join ",\n", @grt_extracts ;
+ chomp( $extract_source ) ;
+
+# handle the in/out as ref options
+
+ my( $open_bracket, $close_bracket ) = $options->{ref_out} ?
+ qw( [ ] ) : ( '', '' ) ;
+
+
+ my $get_index_code = <{string_data} ? <{descending} ) {
+
+# negate the key values so they sort in descending order
+
+ $negate_code = '$val = -$val; ' ;
+
+# descending GRT number sorts must be signed to handle the negated values
+
+ $key->{signed} = 1 if delete $key->{unsigned} ;
+ $key->{signed_float} = 1 if delete $key->{unsigned_float} ;
+ }
+ else {
+
+ $negate_code = '' ;
+ }
+
+ if ( $key->{unsigned} ) {
+
+ $pack_format = 'N' ;
+ $val_code = '$val' ;
+ }
+ elsif ( $key->{signed} ) {
+
+# convert the signed integer to unsigned by flipping the sign bit
+
+ $pack_format = 'N' ;
+ $val_code = "\$val ^ (1 << ($INT_BIT_LEN - 1))"
+ }
+ elsif ( $key->{unsigned_float} ) {
+
+# pack into A format with a length of a float
+
+ $pack_format = "A$FLOAT_LEN" ;
+ $val_code = qq{ $FLOAT_PACK ^ "\\x80" } ;
+ }
+ else {
+
+# must be a signed float
+
+ $pack_format = "A$FLOAT_LEN" ;
+
+# debug code that can be put in to dump what is being packed.
+# print "V [\$val]\\n" ;
+# print unpack( 'H*', pack 'd', \$val ), "\\n" ;
+
+
+# only negate float numbers other than 0. in some odd cases a float 0
+# gets converted to a -0 (which is a legal ieee float) and the GRT
+# packs it as 0x80000.. instead of 0x00000....)
+
+# it happens on sparc and perl 5.6.1. it needs a math op (the tests
+# runs the gold sort which does <=> on it) and then negation for -0 to
+# show up. 5.8 on sparc is fine and all perl versions on intel are
+# fine
+
+# the 'signed float edge case descending' test in t/numbers.t
+# looks for this.
+
+ $negate_code =~ s/;/ if \$val;/ ;
+
+ $val_code = qq{ $FLOAT_PACK ^
+ ( \$val < 0 ? "$XOR_NEG" : "\\x80" )
+ } ;
+ }
+
+ my $grt_extract = <{code} ; $negate_code$val_code }
+CODE
+
+ return( $pack_format, $grt_extract, '' ) ;
+}
+
+sub _make_GRT_string_key {
+
+ my( $key, $key_ind ) = @_ ;
+
+ my( $init_code, $pack_format ) ;
+
+ if ( my $fix_len = $key->{fixed} ) {
+
+# create the xor string to invert the key for a descending sort.
+ $init_code = <{descending} ;
+ my \$_xor$key_ind = "\\xFF" x $fix_len ;
+CODE
+ $pack_format = "a$fix_len" ;
+
+ }
+ elsif ( $key->{varying} ) {
+
+# create the code to scan for the maximum length of the values for this key
+# the INPUT will be changed later to handle a list or a ref as input
+
+ $init_code = <{code} ; length \$val } INPUT
+ ) ;
+CODE
+
+# create the xor string to invert the key for a descending sort.
+
+ $init_code .= <{descending} ;
+ my \$_xor$key_ind = "\\xFF" x \$len$key_ind ;
+CODE
+
+# we pack as a null padded string. its length is in the
+
+ $pack_format = "a\${len$key_ind}" ;
+ }
+ else {
+
+# we can't sort plain (null terminated) strings in descending order
+
+ $@ = <{descending} ;
+make_sorter: A GRT descending string needs to select either the
+'fixed' or 'varying' attributes
+ERR
+
+ $pack_format = 'Z*' ;
+ }
+
+ my $descend_code = $key->{descending} ? " . '' ^ \$_xor$key_ind" : '' ;
+
+ my $grt_extract = <{no_case} ;
+ $grt_extract =~ s/EXTRACT/$key->{code}/ ;
+
+ return( $pack_format, $grt_extract, $init_code ) ;
+}
+
+sub sorter_source {
+
+ $sources{ +shift || '' } ;
+}
+
+1 ;
+
+__END__
+
+=head1 NAME
+
+Sort::Maker - A simple way to make efficient sort subs
+
+=head1 SYNOPSIS
+
+ use Sort::Maker ;
+
+ my $sorter = make_sorter( ... ) ;
+
+
+=head1 DESCRIPTION
+
+This module has two main goals: to make it easy to create correct sort
+functions, and to make it simple to select the optimum sorting
+algorithm for the number of items to be sorted. Sort::Maker generates
+complete sort subroutines in one of four styles, plain, orcish
+manouver, Schwartzian Transform and the Guttman-Rosler Transform. You
+can also get the source for a sort sub you create via the
+sorter_source call.
+
+=head1 C
+
+The sub C is exported by Sort::Maker. It makes a sort sub
+and returns a reference to it. You describe how you want it to sort
+its input by passing general options and key descriptions to
+C.
+
+=head2 Arguments to C
+
+There are two types of arguments, boolean and value. Boolean arguments
+can be set just with the option name and can optionally be followed by
+'1'. You can easily set multiple boolean general arguments with
+qw(). Value arguments must have a following value. Arguments can
+appear in any order but the key descriptions (see below) must appear
+in their sort order. The code examples below show various ways to set
+the various arguments.
+
+Arguments fall into four categories: selecting the style of the sort,
+key descriptions, setting defaults for key description attributes, and
+setting general flags and values. The following sections will describe
+the categories and their associated arguments.
+
+=head2 Sort Style
+
+The style of the sort to be made is selected by setting one of the
+following Boolean arguments. Only one may be set otherwise an error
+is reported (see below for error handling). Also see below for
+detailed descriptions of the supported sort styles.
+
+ plain
+ orcish
+ ST
+ GRT
+
+ # Make a plain sorter
+ my $plain_sorter = make_sorter( qw( plain ) ... ) ;
+
+ # Make an orcish manouevre sorter
+ my $orcish_sorter = make_sorter( orcish => 1 ... ) ;
+
+ # Make a Schwartzian Transform sorter
+ my $st_sorter = make_sorter( 'ST', 1, ... ) ;
+
+ # Make a GRT sort
+ my $GRT = make_sorter( 'GRT', ... ) ;
+
+=head2 Key Attribute Defaults
+
+The following arguments set defaults for the all of the keys'
+attributes. These default values can be overridden in any individual
+key. Only one of the attributes in each of the groups below can be
+set as defaults or for any given key. If more than one attribute in
+each group is set, then C will return an error. The
+attribute that is the default for each group is marked. See below for
+details on key attributes.
+
+ ascending (default)
+ descending
+
+ case (default)
+ no_case
+
+ signed
+ unsigned
+ signed_float (default)
+ unsigned_float
+
+ fixed
+ varying
+
+=head2 General Options
+
+These arguments set general options that apply to how the generated
+sorter interacts with the outside world.
+
+=head3 C
+
+This is a value option which exports the generated sort sub to that
+name. The call to C must be run to install the named
+named function before it is called. You should still check the result
+of C to see if an error occurred (it returns undef).
+
+ my $sorter = make_sorter( name => 'sort_func', ... ) ;
+ die "make_sorter: $@" unless $sorter ;
+
+ ...
+
+ @sorted = sort_func @unsorted ;
+
+=head3 C
+
+This boolean arguments specifies that the input to and output from the
+sort sub will be array references. C makes the sorter only
+take as input a single array reference (which contains the unsorted
+records). C makes the sorter only return a single array
+reference (which contains the sorted records). You can set both of
+these options in a sorter.
+
+Note: This does not affect key extraction code which still gets each
+record in C<$_>. It only modifies the I/O of the generated sorter.
+
+ # input records are in an array reference
+ my $sorter = make_sorter( qw( ref_in ), ... ) ;
+ @sorted_array = $sorter->( \@unsorted_input ) ;
+
+ # sorted output records are in an array reference
+ my $sorter = make_sorter( ref_out => 1, ... ) ;
+ $sorted_array_ref = $sorter->( @unsorted_input ) ;
+
+ # input and output records are in array references
+ my $sorter = make_sorter( qw( ref_in ref_out ), ... ) ;
+ $sorted_array_ref = $sorter->( \@unsorted_input ) ;
+
+=head3 C
+
+This boolean argument specifies that the input records will be plain
+text strings with no null (0x0) bytes in them. It is only valid for
+use with the GRT and it is ignored for the other sort styles. It tells
+the GRT that it can put the record directly into the string cache and
+it will be separated from the packed keys with a null byte (hence that
+restriction). This is an optimization that can run slightly faster
+than the normal index sorting done with the GRT. Run this to see the
+benchmark results.
+
+ perl t/string_data.t -bench
+
+=head3 C
+
+This value argument is code that will be put into the beginning of the
+generated sorter subroutine. It is meant to be used to declare lexical
+variables that the extraction code can use. Normally different
+extraction code have no way to share common code. By declaring
+lexicals with the C option, some key extraction code
+can save data there for use by another key. This is useful if you have
+two (or more) keys that share a complex piece of code such as
+accessing a deep value in a record tree.
+
+For example, suppose the input record is an array of arrays of hashes
+of strings and the string has 2 keys that need to be grabbed by a
+regex. The string is a string key, a ':' and a number key. So the
+common part of the key extraction is:
+
+ $_->[0][0]{a}
+
+And the make_sorter call is:
+
+ my $sorter = make_sorter(
+ 'ST',
+ init_code => 'my( $str, $num ) ;',
+ string => 'do{( $str, $num ) =
+ $_->[0][0]{a} =~ /^(\w+):(\d+)$/; $str}',
+ number => '$num'
+ ) ;
+
+In the above code both keys are extracted in the first key extraction
+code and the number key is saved in C<$num>. The second key extraction
+code just uses that saved value.
+
+Note that C is only useful in the ST and GRT sort styles as
+they process all the keys of a record at one time and can use
+variables declared in C to transfer data to later keys. The
+plain and orcish sorts may not process a later key at the same time as
+an earlier key (that only happens when the earlier key is compared to
+an equal key). Also for C to be a win, the data set must be
+large enough and the work to extract the keys must be hard enough for
+the savings to be noticed. The test init_code.t shows some examples
+and you can see the speedup when you run:
+
+ perl t/init_code.t -bench
+
+=head2 Key Description Arguments
+
+Sorting data requires that records be compared in some way so they can
+be put into a proper sequence. The parts of the records that actually
+get compared are called its keys. In the simplest case the entire
+record is the key, as when you sort a list of numbers or file
+names. But in many cases the keys are embedded in the full record and
+they need to be extracted before they can be used in comparisons.
+Sort::Maker uses key descriptions that extract the key from the
+record, and optional other attributes that will help optimize the
+sorting operation. This section will explain how to pass key
+description arguments to the make_sorter subroutine and what the
+various attributes mean and how to best use them.
+
+The generated sorter will sort the records according to the order of
+the key arguments. The first key is used to compare a pair of records
+and if they are deemed equal, then the next key is examined. This happens
+until the records are given an ordering or you run out of keys and the
+records are deemed equal in sort order. Key descriptions can be mixed
+with the other arguments which can appear in any order and anywhere in
+the argument list, but the keys themselves must be in the desired
+order.
+
+A key argument is either 'string' or 'number' followed by optional
+attributes. The key type sets the way that the key is compared
+(e.g. using 'cmp' or '<=>'). All key attributes can be set from the
+default values in the global arguments or set in each individual key
+description.
+
+There are 4 ways to provide attributes to a key:
+
+=head3 No attributes
+
+A key argument which is either at the end of the argument list or is
+followed by a valid keyword token has no explict attributes. This key
+will use the default attributes. In both of these examples, a default
+attribute was set and used by the key description which is just a
+single key argument.
+
+ # sort the record as a single number in descending order
+ my $sorter = make_sorter( qw( plain number descending ) ) ;
+
+ # sort the record as a case sensitive string
+ my $sorter = make_sorter( qw( plain case string ) ) ;
+
+ # sort the record as a single number in ascending order
+ my $sorter = make_sorter( qw( ST number ) ) ;
+
+=head3 Only Code as a Value
+
+A key argument which is followed by a scalar value which is not a
+valid keyword token, will use that scalar value as its key extraction
+code. See below for more on key extraction code.
+
+ # sort by the first (optionally signed) number matched
+ my $sorter = make_sorter( qw( plain number /([+-]?\d+)/ ) ) ;
+
+ # string sort by the 3rd field in the input records (array refs)
+ my $sorter = make_sorter( 'ST', string => '$_->[2]' ) ;
+
+=head3 An Array Reference
+
+A key argument which is followed by an array reference will parse that
+array for its description attributes. As with the general boolean
+arguments, any boolean attribute can be optionally followed by a
+'1'. Value attributes must be followed by their value.
+
+ # another way to specify the same sort as above
+ # sort by the first (optionally signed) number matched
+
+ my $sorter = make_sorter(
+ qw( plain ),
+ number => [
+ code => '/(\d+)/',
+ 'descending',
+ ],
+ ) ;
+
+ # same sort but for the GRT which uses the 'unsigned'
+ # attribute to optimize the sort.
+
+ my $sorter = make_sorter(
+ qw( GRT ),
+ number => [
+ qw( descending unsigned ),
+ code => '/(\d+)/',
+ ],
+ ) ;
+
+=head3 A Hash Reference
+
+A key argument which is followed a hash reference will use that hash
+as its description attributes. Any boolean attribute in the hash must
+have a value of '1'. Value attributes must be followed by their
+value.
+
+ # another way to specify the same sort as above
+ # sort by the first (optionally signed) number matched
+
+ my $sorter = make_sorter(
+ qw( plain ),
+ number => {
+ code => '/(\d+)/',
+ descending => 1,
+ },
+ ) ;
+
+ # a multi-key sort. the first key is a descending unsigned
+ # integer and the second is a string padded to 10 characters
+
+ my $sorter = make_sorter(
+ qw( GRT ),
+ number => {
+ code => '/(\d+)/',
+ descending => 1,
+ unsigned => 1,
+ },
+ string => {
+ code => '/FOO<(\w+)>/',
+ fixed => 10,
+ },
+ ) ;
+
+=head2 Key Description Attributes
+
+What follows are the attributes for key descriptions. Most use
+the default values passed in the arguments to C.
+
+=head3 C
+
+This value attribute is the code that will be used to extract a key
+from the input record. It can be a string of Perl code, a qr// regular
+expression (Regexp reference) or an anonymous sub (CODE reference)
+that operates on $_ and extracts a value. The code will be wrapped in
+a do{} block and called in a list context so that regular expressions
+can just use () to grab a key value. The code defaults to C<$_> which
+means the entire record is used for this key. You can't set the
+default for code (unlike all the other key attributes). See the
+section on Extraction Code for more.
+
+ # make an ST sort of the first number grabbed in descending order
+
+ my $sorter = make_sorter(
+ qw( ST ),
+ number => {
+ code => '/(\d+)/',
+ descending => 1,
+ },
+ ) ;
+
+=head3 C
+
+These two Boolean attributes control the sorting order for this
+key. If a key is marked as C (which is the initial default
+for all keys), then lower keys will sort before higher
+keys. C sorts have the higher keys sort before the lower
+keys. It is illegal to have both set in the defaults or in any key.
+
+ # sort by descending order of the first grabbed number
+ # and then sort in ascending order the first grabbed
+
+ my $sorter = make_sorter(
+ qw( ST descending ),
+ number => {
+ code => '/(\d+)/',
+ },
+ string => {
+ code => '/<(\w+)>/',
+ ascending => 1,
+ },
+ ) ;
+
+ # this will return undef and store an error in $@.
+ # you can't have both 'ascending' and 'descending' as defaults
+
+ my $sorter = make_sorter(
+ qw( ST ascending descending ),
+ number => {
+ code => '/(\d+)/',
+ descending => 1,
+ },
+ ) ;
+
+ # this will return undef and store an error in $@.
+ # you can't have both 'ascending' and 'descending' in a key
+
+ my $sorter = make_sorter(
+ qw( ST )
+ number => {
+ code => '/(\d+)/',
+ descending => 1,
+ ascending => 1,
+ },
+ ) ;
+
+=head3 C
+
+These two Boolean attributes control how 'string' keys handle case
+sensitivity. If a key is marked as C (which is the initial
+default for all keys), then keys will treat upper and lower case
+letters as different. If the key is marked as C then they
+are treated as equal. It is illegal to have both set in the defaults
+or in any key. Internally this uses the uc() function so you can use
+locale settings to affect string sorts.
+
+ # sort by the first grabbed word with no case
+ # and then sort the grabbed with case
+
+ my $sorter = make_sorter(
+ qw( ST no_case ),
+ string => {
+ code => '/(\w+)/',
+ },
+ string => {
+ code => '/<(\w+)>/',
+ case => 1,
+ },
+ ) ;
+
+ # this will return undef and store an error in $@.
+ # you can't have both 'case' and 'no_case' as defaults
+
+ my $sorter = make_sorter(
+ qw( ST no_case case ),
+ string => {
+ code => '/(\w+)/',
+ },
+ ) ;
+
+ # this will return undef and store an error in $@.
+ # you can't have both 'case' and 'no_case' in a key
+
+ my $sorter = make_sorter(
+ qw( ST )
+ string => {
+ code => '/(\w+)/',
+ no_case => 1,
+ case => 1,
+ },
+ ) ;
+
+=head3 C
+
+This Boolean attribute causes this key to use call its CODE reference
+to extract its value. This is useful if you need to access a lexical
+variable during the key extraction. A typical use would be if you have
+a sorting order stored in a lexical and need to access that from the
+extraction code. If you didn't set the C attribute for this
+key, the generated source (see Key Extraction) would not be able to
+see that lexical which will trigger a Perl compiling error in
+make_sorter.
+
+ my @months = qw(
+ January February March April May June
+ July August September October November December ) ;
+ my @month_jumble = qw(
+ February June October March January April
+ July November August December May September ) ;
+
+ my %month_to_num ;
+ @month_to_num{ @months } = 1 .. @months ;
+
+# this will fail to generate a sorter if 'closure' is removed
+# as %month_to_num will not be in scope to the eval inside sort_maker.
+
+ my $sorter = make_sorter(
+ 'closure',
+ number => sub { $month_to_num{$_} },
+ ) ;
+
+ my @sorted = $sorter->( @month_jumble ) ;
+
+
+=head3 C (GRT only)
+
+These Boolean attributes are only used by the GRT sort style. They are
+meant to describe the type of a number key so that the GRT can best
+process and cache the key's value. It is illegal to have more than one
+of them set in the defaults or in any key. See the section on GRT
+sorting for more.
+
+The C and C attributes mark this number key as an
+integer. The GRT does the least amount of work processing an unsigned
+integer and only slightly more work for a signed integer. It is worth
+using these attributes if a sort key is restricted to integers.
+
+The C (which is the normal default for all keys) and
+C attributes mark this number key as a float. The GRT
+does the less work processing an unsigned float then a signed float.
+It is worth using the C attribute if a sort key is
+restricted to non-negative values. The C attribute is
+supported to allow overriding defaults and to make it easier to
+auto-generate sorts.
+
+=head3 C (GRT only)
+
+These attributes are only used by the GRT sort style. They are used
+to describe the type of a string key so that the GRT can properly
+process and cache the key's value. It is illegal to have more than one
+of them set in the defaults or in any key. See the section on GRT
+sorting for more.
+
+C is a value attribute that marks this string key as always
+being this length. The extracted value will either be padded with null
+(0x0) bytes or truncated to the specified length (the value of
+C). The data in this key may have embedded null bytes (0x0) and
+may be sorted in descending order.
+
+C is a Boolean attribute marks this string key as being of
+varying lengths. The GRT sorter will do a scan of all of this key's
+values to find the maximum string length and then it pads all the
+extracted values to that length. The data in this key may have
+embedded null bytes (0x0) and may be sorted in descending order.
+
+=head2 Key Extraction Code
+
+Each input record must have its sort keys extracted from the data.
+This is the purpose of the 'code' attribute in key descriptions. The
+code has to operate on a record which is in C<$_> and it must return
+the key value. The code is executed in a list context so you can use
+grabs in m// to return the key. Note that only the first grab will be
+used but you shouldn't have more than one anyway. See the examples
+below.
+
+Code can be either a string, a qr// object (Regexp reference) or an
+anonymous sub (CODE reference).
+
+If qr// is used, the actual generated code will be m($qr) which works
+because qr// will interpolate to its string representation. The
+advantage of qr// over a string is that the qr// will be syntax
+checked at compile time while the string only later when the generated
+sorter is compiled by an eval.
+
+If a CODE reference is found, it is used to extract the key in the
+generated sorter. As with qr//, the advantage is that the extraction
+code is syntax checked at compile time and not runtime. Also the
+deparsed code is wrapped in a C block so you may use complex
+code to extract the key. In the default case a CODE reference will be
+deparsed by the B::Deparse module into Perl source. If the key has the
+C attribute set, the code will be called to extract the key.
+
+The following will generate sorters with exact same behavior:
+
+ $sorter = make_sorter( 'ST', string => '/(\w+)/' ) ;
+ $sorter = make_sorter( 'ST', string => qr/(\w+)/ ) ;
+ $sorter = make_sorter( 'ST', string => sub { /(\w+)/ } ) ;
+ $sorter = make_sorter( 'ST', 'closure', string => sub { /(\w+)/ } ) ;
+
+Extraction code for a key can be set in one of three ways.
+
+=head3 No explicit code
+
+If you don't pass any extraction code to a key, it will default to C<$_>
+which is the entire record. This is useful in certain cases such as in
+simple sorts where you are sorting the entire record.
+
+ # sort numerically and in reverse order
+ my $sorter = make_sorter( qw( plain number descending ) ;
+
+ # sort with case folding
+ my $sorter = make_sorter( qw( plain no_case string ) ;
+
+ # sort by file time stamp and then by name
+ my $sorter = make_sorter( 'ST', number => '-M', 'string' ) ;
+
+=head3 Code is the only key attribute
+
+In many cases you don't need to specify any specific key attributes (the
+normal or globally set defaults are fine) but you need extraction
+code. If the argument that follows a key type ( 'string' or 'number' )
+is not a valid keyword, it will be assumed to be the extraction code
+for that key.
+
+ # grab the first number string as the key
+ my $sorter = make_sorter( qw( plain number /(\d+)/ ) ) ;
+
+ # no_case string sort on the 3rd-5th chars of the 2nd array element
+ my $sorter = make_sorter(
+ plain => 1,
+ no_case => 1,
+ string => 'substr( $_->[1], 2, 3)'
+ ) ;
+
+=head3 Key needs specific attributes
+
+When the key needs to have its own specific attributes other than its
+code, you need to pass them in an ARRAY or HASH reference. This is
+mostly needed when there are multiple keys and the defaults are not
+correct for all the keys.
+
+ # string sort by the first 3 elements of the array record with
+ # different case requirements
+
+ my $sorter = make_sorter(
+ ST => 1,
+ string => {
+ code => '$_->[0]',
+ no_case => 1,
+ },
+ string => '$_->[1]',
+ string => {
+ code => '$_->[2]',
+ no_case => 1,
+ },
+ ) ;
+
+ # GRT sort with multiple unsigned integers and padded strings
+ # note that some keys use a hash ref and some an array ref
+ # the record is marked with key\d: sections
+ my $sorter = make_sorter(
+ GRT => 1,
+ descending => 1,
+ number => {
+ code => 'key1:(\d+)',
+ unsigned => 1,
+ },
+ number => [
+ code => 'key2:([+-]?\d+)',
+ qw( signed ascending ),
+ ],
+ string => [
+ code => 'key3:(\w{10})',
+ fixed => 1,
+ ascending => 1,
+ ],
+ # pad the extracted keys to 8 chars
+ string => {
+ code => 'key4:([A-Z]+)',
+ pad => 8,
+ },
+ ) ;
+
+=head1 Key Caching
+
+A good question to ask is "What speed advantages do you get from this
+module when all the sorts generated use Perl's internal sort function?"
+The sort function has a O( N * log N ) growth function which means that
+the amount of work done increases by that formula as N (the number of
+input records) increases. In a plain sort this means the the key
+extraction code is executed N * log N times when you only have N
+records. That can be a very large waste of cpu time. So the other three
+sort styles speed up the overall sort by only executing the extraction
+code N times by caching the extracted keys. How they cache the keys is
+their primary difference. To compare or study the actual code generated
+for the different sort styles, you can run make_sorter and just change
+the style. Then call sorter_source (not exported by default) and pass it
+the sort code reference returned by make_sorter. It will return the
+generated sort source.
+
+=head2 C
+
+Plain sorting doesn't do any key caching. It is fine for short input
+lists (see the Benchmark section) and also as a way to see how much CPU
+is saved when using one of the other styles.
+
+=head2 C
+
+The Orcish maneuvre (created by Joseph Hall) caches the extracted keys
+in a hash. It does this with code like this:
+
+ $cache{$a} ||= CODE($a) ;
+
+CODE is the extract code and it operates on a record in $a. If we have
+never seen this record before then the cache entry will be undef and the
+||= operator will assign the extracted key to that hash slot. The next
+time this record is seen in a comparison, the saved extracted key will
+be found in the hash and used. The name orcish comes from OR-cache.
+
+=head2 C
+
+The ST (Schwartzian Transform and popularized by Randal Schwartz) uses
+an anonymous array to store the record and its extracted keys. It
+first executes a map that creates an anonymous array:
+
+ map [ $_, CODE1( $_ ), CODE2( $_ ) ], @input
+
+The CODE's extract the set of keys from the record but only once per
+record so it is O(N). Now the sort function can just do the comparisons
+and it returns a list of sorted anonymous arrays.
+
+ sort {
+ $a->[1] cmp $b->[1]
+ ||
+ $a->[2] cmp $b->[2]
+ }
+
+Finally, we need to get back the original records which are in the first
+slot of the anonymous array:
+
+ map $_->[0]
+
+This is why the ST is known as a map/sort/map technique.
+
+=head2 C
+
+The Guttman-Rosler Transform (popularized by Uri Guttman and Larry
+Rosler) uses a string to cache the extracted keys as well as either
+the record or its index. It is also a map/sort/map technique but
+because its cache is a string, it can be sorted without any Perl level
+callback (the {} block passed to sort). This is a signifigant win
+since that callback is running O( N log N). But this speedup comes at
+a cost of complexity. You can't just join the keys into a string and
+properly sort them. Each key may need to be processed so that it will
+correctly sort in order and it doesn't interfere with other keys. That
+is why the GRT has several key attributes to enable it to properly and
+efficiently pack the sort keys into a single string. The following
+lists the GRT key attributes, when you need them and what key
+processing is done for each. Note that you can always enable the GRT
+specific attributes as they are just ignored by the other sort styles.
+
+The GRT gains its speed by using a single byte string to cache all of
+the extracted keys from a given input record. Packing keys into a
+string such that it will lexically sort the correct way requires some
+deep mojo and data munging. But that is why this module was written -
+to hide all that from the coder. Below are descriptions of how the
+various key types are packed and how to best use the GRT specific key
+attributes. Note: you can only use one of the GRT number or string
+attributes for any key. Setting more than one in either the defaults
+or in any given key is an error (a key's attribute can override a
+default choice).
+
+=head3 C
+
+The 'unsigned' Boolean attribute tells the GRT that this number key is a
+non-negative integer. This allows the GRT to just pack it into 4 bytes
+using the N format (network order - big endian). An integer packed this
+way will have its most significant bytes compared before its least
+signifigant bytes. This involves the least amount of key munging and so
+it is the most efficient way to sort numbers in the GRT.
+
+If you want this key to sort in descending order, then the key value is
+negated and normalized (see the 'signed' attribute) so there is no
+advantage to using 'unsigned'.
+
+=head3 C
+
+The 'signed' Boolean attribute tells the GRT that this number key is
+an integer. This allows the GRT to just pack it into 4 bytes using the
+N format (network order - big endian). The key value must first be
+normalized which will convert it to an unsigned integer but with the
+same ordering as a signed integer. This is simply done by inverting
+the sign (highest order) bit of the integer. As mentioned above, when
+sorting this key in descending order, the GRT just negates the key
+value.
+
+NOTE: In the GRT the signed and unsigned integer attributes only work
+on perl built with 32 bit integers. This is due to using the N format
+of pack which is specified to be 32 bits. A future version may support
+64 bit integers (anyone want to help?).
+
+=head3 C
+
+The 'unsigned_float' Boolean attribute tells the GRT that this number
+key is a non-negative floating point number. This allows the GRT to
+pack it into 8 bytes using the 'd' format. A float packed this way
+will have its most significant bytes compared before its least
+signifigant bytes.
+
+=head3 C
+
+The C Boolean attribute (which is the default for all
+number keys when using the GRT) tells the GRT that this number key is
+a floating point number. This allows the GRT to pack it into 8 bytes
+using the 'd' format. A float packed this way will have its most
+significant bytes compared before its least signifigant bytes. When
+processed this key will be normalized to an unsigned float similar to
+to the C to C conversion mentioned above.
+
+NOTE: The GRT only works with floats that are in the IEEE format for
+doubles. This includes most modern architectures including x86, sparc,
+powerpc, mips, etc. If the cpu doesn't have IEEE floats you can either
+use the integer attributes or select another sort style (all the
+others have no restriction on float formats).
+
+=head3 simple string.
+
+If a string key is being sorted in ascending order with the GRT and it
+doesn't have one of the GRT string attributes, it will be packed
+without any munging and a null (0x0) byte will be appended to it. This
+byte enables a shorter string to sort before longer ones that start
+with the shorter string.
+
+NOTE: You cannot sort strings in descending order in the GRT unless
+the key has either the 'fixed' or 'varying' attributes set. Also, if a
+string is being sorted in ascending order but has any null (0x0) bytes
+in it, the key must have one of those attributes set.
+
+=head3 C
+
+This value attribute tells the GRT to pack this key value as a fixed
+length string. The extracted value will either be padded with null
+(0x0) bytes or truncated to the specified length (the value of the
+C attribute). This means it can be packed into the cache string
+with no padding and no trailing null byte is needed. The key can
+contain any data including null (0x0) bytes. Data munging
+happens only if the key's sort order is descending. Then the key value is
+xor'ed with a same length string of 0xff bytes. This toggles each bit
+which allows for a lexical comparison but in the reverse order. This
+same bit inversion is used for descending varying strings.
+
+=head3 C
+
+This Boolean attribute tells the GRT that this key value is a varying length
+string and has no predetermined padding length. A prescan is done to
+determine the maximum string length for this key and that is used as the
+padding length. The rest is the same as with the 'fixed' attribute.
+
+=head2 C
+
+This sub (which can be exported) returns the source of a generated
+sort sub or the source of the last one that had an error. To get the
+source of an existing sort sub, pass it a reference to that sub (i.e.
+the reference returned from make_sorter). To get the source for a
+failed call to make_sorter, don't pass in any arguments.
+
+ my $sorter = make_sorter( ... ) ;
+ print sorter_source( $sorter ) ;
+
+ make_sorter( name => 'my_sorter', ... ) ;
+ print sorter_source( \&my_sorter ) ;
+
+ my $sorter = make_sorter( ... )
+ or die "make_sorter error: $@\n", sorter_source();
+
+If all you want is the generated source you can just do:
+
+ print sorter_source make_sorter( ... ) ;
+
+=head2 Error Handling
+
+When C detects an error (either bad arguments or when the
+generated sorter won't compile), it returns undef and set $@ to an
+error message. The error message will include the generated source and
+compiler and warning errors if the sorter didn't compile correctly.
+The test t/errors.t covers all the possible error messages. You can
+also retrieve the generated source after a compiling error by calling
+C.
+
+=head1 TESTS
+
+C uses a table of test configurations that can both run
+tests and benchmarks. Each test script is mostly a table that
+generates multiple versions of the sorters, generate sample data and
+compares the sorter results with a sort that is known to be good. If
+you run the scripts directly and with a -bench argument, then they
+generate the same sorter subs and benchmark them. This design ensures
+that benchmarks are running on correctly generated code and it makes
+it very easy to add more test and benchmark variations. The code that
+does all the work is in t/common.pl. Here is a typical test table
+entry:
+
+ {
+ skip => 0,
+ source => 0,
+ name => 'init_code',
+ gen => sub { rand_choice( @string_keys ) . ':' .
+ rand_choice( @number_keys ) },
+ gold => sub {
+ ($a =~ /^(\w+)/)[0] cmp ($b =~ /^(\w+)/)[0]
+ ||
+ ($a =~ /(\d+$)/)[0] <=> ($b =~ /(\d+$)/)[0]
+ },
+ args => [
+ init_code => 'my( $str, $num ) ;',
+ string => 'do{( $str, $num ) = /^(\w+):(\d+)$/; $str}',
+ number => '$num',
+ ],
+ },
+
+C is a boolean that causes this test/benchmark to be skipped.
+Setting C
+
+
+
Sorting´s critical operation is determining in which order to put pairs of elements of the data. The comparison can be as simple as finding whether two numbers are equal or which is greater than the other (or doing similar operations on strings), or it can be quite complex.
+
Simple sorting algorithms (bubble or insertion sorts) compare each element to each of the others repeatedly, so their complexity is O(N**N). Even with the triangle optimization ($x is equal to $x, and $x compared to $y is the negative of $y compared to $x), which reduces the function to O((N * (N-1))/2), the complexity is still O(N**N), as explained above.
+
But these algorithms have their uses. When N is very small, they can actually be faster than the other methods, because the O(1) and O(N) overhead of the advanced sorts may outweigh the O(N**2) behavior of the simple sorts. "Fancy algorithms are slow when N is small, and N is usually small. Fancy algorithms have big constants." [3] The really important cases, which are worth care in the coding, occur when N is large.
+
Advanced sorting methods repeatedly partition the records to be sorted into smaller sets, to reduce the total number of comparisons needed. Their complexity is O(N*logN), which can be much less than O(N**2) for sufficiently large values of N. These algorithms include `tree sort´, `shell sort´, and `quicksort´. [4]
+
Some specialized sort algorithms (such as `radix sort´) work by comparing pieces of numeric sortkeys, and can achieve linear complexity (O(N)) [5]. These methods are not general-purpose, so we will not address them further.
+
One property of sort algorithms is whether they are stable. A stable sort preserves the order in the sorted data of two elements that compare equal. Some sorting problems require stability. The simple sorting algorithms are generally stable; the advanced ones are not. We will show how to make Perl´s advanced sort behave stably if required.
+
An important sorting variation is when the original data elements can´t conveniently be moved around by the sort algorithm´s shuffling. So instead of sorting the elements directly, you sort their index numbers. You then use the sorted indexes to create a list of sorted elements. Some sort operators in other languages (APL comes to mind) simply return sorted indexes, and it is up to the programmer to use them correctly. We will show how to create an efficient Perl index sort and where it is useful.
+
Sortkeys
+
If you are sorting a set of scalar-valued elements where the comparison looks at the entire element, the sortkey is simply the entire element. More generally, the sortkey is based on some properties that are functions of all or part of the element. Such subkeys may be extracted from internal properties of parts of the element (fields) or derived from external properties of the element (such as the modification date of a file named by the element, which is quite expensive to retrieve from the file system).
+
To avoid repeated computation of the sortkeys, the sort process has to retain the association between records and their extracted or derived sortkeys. Sorting theory and algorithms usually ignore the cost of this association, as it is typically a constant factor of the comparison operation. But as we will see later, in the real world, removing that overhead or reducing it from O(N*logN) to O(N) is very valuable, especially as N grows.
+
Complex sortkeys can add tremendously to the overhead of each comparison. This occurs where the records have to be sorted by primary, secondary, and lower-order subkeys. This is also known as doing a subsort on the lower keys. Extracting and comparing complex sortkeys can be costly and error-prone.
+
No general-purpose implementation of a sort algorithm can efficiently support extracting and comparing different types of sortkeys. Therefore, most sort implementations provide a simple interface to call a sortsub -- a custom comparison subroutine which is passed two operands. These operands can be the records themselves, or references to or indexes of complex records. The comparison returns a negative, zero, or positive value, depending on the ordering of the sortkeys of the two records. The programmer is responsible for any preprocessing of the records to generate the sortkeys and any postprocessing to retrieve the sorted data. The generic sort function only manages the comparisons and shuffles the operands into sorted order.
+
As Perl´s sort function is O(N*logN), efficiency must come from extracting and comparing the sortkeys using the least amount of work. Much of this paper will be about methods to make sortkey extraction and comparison as efficient as possible.
+
External sorting
+
Every popular commercial operating system offers a sort utility. Unix/POSIX flavors typically have a sort command which is fast and fairly flexible with regard to sortkey extraction from text files. In some cases, the Unix/POSIX sort command may be easier to code and more efficient than using the Perl sort function.
+
Several vendors sell highly optimized commercial sort packages that have received decades of attention and can handle massive amounts of data. But they are very expensive and not suitable for use inside a Perl program.
+
All of these are capable of dealing efficiently with very large amounts of data, using external media such as disk or tape files for intermediate storage when needed. In contrast, the Perl sort function requires that the entire list of operands be in (real or -- much more expensively -- virtual) memory at the same time. So Perl is not the appropriate tool to use for huge sorts (where huge is defined by your system´s memory limits), which we shall not consider further.
+
Internal sorting
+
The Perl sort function uses an implementation of the quicksort algorithm that is similar to (but more robust than) the qsort function in the ANSI/ISO Standard C Library [6]. In the simplest use, the Perl sort function requires no sortsub:
+
@out = sort @in;
+
This default sorts the data in ascending lexicographic order, using the fast C memcmp function as the comparison operation. If a locale is specified, it substitutes the more complicated and somewhat slower C strcoll function.
+
If you want any kind of ordering other than this, you must provide a custom comparison sortsub. The sortsub can be specified either as a code block, the name of a subroutine, or a typeglob that refers to a subroutine (a coderef). In Perl 5.6, a scalar variable that contains a coderef can also be used to specify the sortsub.
+
In order to optimize the calling of the sortsub, Perl bypasses the usual passing of arguments via @_, using instead a more efficient special-purpose method. Within the sortsub, the special global package variables $a and $b are aliases for the two operands being compared. The sortsub must return a number less than 0, equal to 0, or greater than 0, depending on the result of comparing the sortkeys of $a and $b. The special variables $a and $b should never be used to change the values of any input data, as this may break the sort algorithm.
+
Even the simplest custom sort in Perl will be less efficient than using the default comparison. The default sort runs entirely in C code in the perl core, but any sortsub must execute Perl code. A well-known optimization is to minimize the amount of Perl code executing and to try to stay inside the perl core as much as possible. Later we will see various optimization techniques that will reduce the amount of Perl code executed.
+
The primary goal of this paper is to perform all sorts using the default comparison. Here is how an explicit ascending lexicographic would be done using a sortsub:
+
@out = sort { $a cmp $b } @in;
+
For a simple measurement, compare Default and Explicit in Benchmark A1 of Appendix A. The default method is about twice as fast as the explicit method.
+
Trivial sorts
+
We call trivial sorts those that use the entire record as the sortkey and do only a minimal amount of processing of the record. To do trivial Perl sorts other than ascending lexicographic, you just need to create an appropriate sortsub. Here are some common ones that perform useful functions.
+
The simplest such example is the ascending numeric sort, which uses the picturesquely monikered `spaceship´ operator:
+
@out = sort { $a <=> $b } @in;
+
A numeric sort capability is required because the lexicographic order of, say, (1, 2, 10) does not correspond to the numeric order.
+
If you want the sort to be in descending order there are three techniques you can use. The worst is to negate the result of the comparison in the sortsub. Better is to reverse the order of the comparison by swapping $a and $b. This has the same speed as the corresponding forward sort.
The best method is to apply the reverse function to the result of a default ascending lexicographic sort.
+
@out = reverse sort @in;
+
Note that this is faster than using the explicit descending lexicographic sort, for the reason discussed above: the default sort is faster than using a sortsub. The reverse function is efficient because it just moves pointers around.
+
Another common problem is sorting with case insensitivity. This is easily solved using the lc or uc function. Either one will give the same results.
+
@out = sort { lc $a cmp lc $b } @in;
+
Benchmark A1 analyzes these examples as a function of the input size. The O(N*logN) behavior is apparent, as well as the cost of using even a simple built-in function like lc in the sortsub.
+
Fielded and record sorts
+
The above trivial sorts sort the input list using as the sortkey the entire string (for a lexicographic sort) or the first number in each datum (for a numeric sort). More typically, the sortkey is based on some property that is a function of all or part of each datum. Several individual subkeys may be combined into a single sortkey or may be compared in pairs individually.
+
A complex string may be divided into fields, some of which may serve as subkeys. For example, the Unix/POSIX sort command provides built-in support for collation based on one or more fields of the input; the Perl sort function does not, and the programmer must provide it. One CPAN module focuses on fielded sorts [7].
+
If your data are records which are complex strings or references to arrays or hashes, you have to perform comparisons on selected parts of the records. This is called record sorting. (Fielded sorts are a subset of record sorts.)
+
In the code examples that follow, KEY() is meant to be substituted with some Perl code that performs sortkey extraction. It is best that it not be an actual subroutine call, because subroutine calls within sortsubs can be expensive. Calls to built-in Perl functions (such as the calls to lc in the example above) are like Perl operators, thus relatively less expensive.
+
When sorting string records, $a and $b are set to those strings, so to extract the sortkeys you generally perform various string operations on the records. Functions commonly used for this include split, substr, unpack, and m//. Here is one example, sorting a list of password-file lines by user name using split. The fields are separated by colons, and the user name is the first field.
In some cases you need to sort records by a primary subkey, then for all the records with the same primary subkey value, you need to sort by a secondary subkey. One horribly inefficient way to do this is to sort first by the primary subkey, then get all the records with a given subkey and sort them by the secondary subkey. The standard method is to do a multi-key sort. This entails extracting a subkey for each field, and comparing paired subkeys in priority order. So if two records with the same primary subkey are compared, they will actually be compared based on the secondary subkey. Sorting on more than two subkeys is done by extending the logic.
+
Perl has a very nice feature which makes multi-key sorts easy to write. The || (short-circuit or) operator returns the actual value of the first logically true operand it sees. So if you use || to concatenate a set of key comparisons, the first comparison is the primary subkey. If a pair of primary subkeys compare equal, the sortsub´s return value will be the result of the secondary subkey comparison.
+
An example will illustrate this `ladder´ of comparisons better than more text. Here is a three-subkey sort:
+
@out = sort {
+ # primary subkeys comparison
+ KEY1($a) cmp KEY1($b)
+ ||
+ # or if they are equal
+ # return secondary comparison
+ # descending numeric comparison
+ KEY2($b) <=> KEY2($a)
+ ||
+ # or if they are equal
+ # return tertiary comparison
+ # lexicographic comparison
+ KEY3($a) cmp KEY3($b)
+} @in;
+
Naive multi-subkey record sorts
+
In the two previous examples, we showed a sort with relatively expensive sortkey extraction (via split), and a multi-subkey sort. Let´s combine them. For concreteness, we shall deal with a problem that has received much attention in comp.lang.perl.misc -- sorting a list of IP addresses in `dotted-quad´ form. Each element of the list is a string of the form "nnn.nnn.nnn.nnn\tabc.xyz.com\n", where nnn represents a decimal integer between 0 and 255, with or without leading zero-padding.
+
In the most naive approach, we sort on each of these four numeric fields as individual subkeys, in succession.
Even for small lists this is very slow, because of the many Perl operations executed in the sortsub for every one of the O(N*logN) comparisons.
+
Computing a single packed-string sortkey
+
To improve performance, we will derive from these four subkeys a single packed-string sortkey for each IP address, which we can then use to sort the array monotonically increasing.
+
The following expression produces the shortest key, a string of four bytes, with the least Perl calculation:
Benchmark A2 shows that comparing the subkeys in pairs is less efficient than packing them and comparing the packed strings. This observation applies to all sorting methods. In further benchmarks of advanced sorts for this problem, we will always used packed sortkeys.
+
Nevertheless, naive sorting is still woefully inefficient, because both sortkeys are recomputed every time one input operand is compared against another. What we need now is a way to compute each sortkey once only and to remember the result.
+
Advanced sorts
+
As all sorts in Perl use the builtin sort function and therefore the same quicksort algorithm, all Perl sorts are of order O(N*logN). We can´t improve upon that, so we have to address other issues to gain efficiency. As the complexity is fixed, tackling the constant factors can be fruitful and, in the real world, can produce significant improvements in efficiency. When a sortsub needs to generate a complex sortkey, that is normally done O(N*LogN) times, but there are only N records, hence N sortkeys. What if we were to extract the sortkey only once per record, and keep track of which sortkey belonged to which record?
+
Caching the sortkeys
+
The obvious way to associate sortkeys with the records from which they were created is to use a hash. The hash can be created in a preprocessing pass over the data. If the approximate size of the data set is known, preallocating the hash improves performance.
+
keys my %cache = @in;
+$cache{$_} = KEY($_) for @in;
+
The following sets up the cache more efficiently, using a hash slice:
In essence, we have replaced lengthy computations in the sortsub by speedy (O(1)) hash lookups.
+
If you want to do a complex multi-key comparison, you either have to use a separate cache for each subkey or combine subkeys in a similar way to the packed-sort optimizations we will describe later. Here is an example of the former:
An important point about cached sorts is that no postprocessing is needed to retrieve the sorted records. The method sorts the actual records, but uses the cache to reduce the sortkey extraction to O(N).
+
The Orcish Maneuver (OM)
+
The Orcish Maneuver (invented by Joseph N. Hall [8]) eliminates the preprocessing pass over the data, which might save keeping a copy of the data if they are being read directly from a file. It does the sortkey extraction only once per record, as it checks the hash to see if it was done before. The test and storage of the sortkey is done with the ||= operator (short-circuit or-assignment), which will evaluate and assign the expression on the right to the lvalue on the left, if the lvalue is false. The name `orcish´ is a pun on `or-cache´. The full statement in the sortsub looks like this:
That sees if the sortkey for $a is cached, and if not, extracts it and caches it. The sortkey for $a is then compared to the sortkey for $b (which is found in the same way).
+
Here is an example of a two-subkey comparison using two caches:
The OM has some minor efficiency flaws. An extra test is necessary after each sortkey is retrieved from the or-cache. Furthermore, if an extracted sortkey has a false value, it will be recomputed every time. This usually works out all right, because the extracted sortkeys are seldom false. However, except when the need to avoid reading the data twice is critical, the explicit cached sort is always slightly faster than the OM. (See Benchmark A3.)
+
The Schwartzian Transform (ST)
+
A more efficient approach to caching sortkeys, without using named temporary variables, was popularized by Randal L. Schwartz, and dubbed the Schwartzian Transform [9, 10]. (It should really be called the Schwartz Transform, after the model of the Fourier and Laplace Transforms, but it is too late to fix the name now.)
+
The significant invention in the ST is the use of anonymous arrays to store the records and their sortkeys. The sortkeys are extracted once, during a preprocessing pass over all the data in the list to be sorted (just as we did before in computing the cache of sortkeys).
The ST doesn´t sort the actual input data. It sorts the references to anonymous arrays that contain the original records and the sortkeys. So we have to postprocess to retrieve the sorted records from the anonymous arrays.
+
Using the ST for a multi-subkey sort is straightforward. Just store each successive extracted subkey in the next entry in the anonymous array. In the sortsub, do an or between comparisons of successive subkeys, as with the OM and the naive sorts.
For a very illuminating deconstruction and reconstruction of the ST, see [11].
+
The packed-default sort
+
Each of the advanced sorting techniques described above saves the operands to be sorted together with their sortkeys. (In the cached sorts, the operands are the keys of a hash and the sortkeys are the values of the hash; in the Schwartzian Transform, the operands are the first elements of anonymous arrays, the sortkeys are the other elements of the arrays.) We now extend that idea to saving the operands to be sorted together with packed-string sortkeys, using concatenation.
+
This little-known optimization improves on the ST by eliminating the sortsub itself, relying on the default lexicographic sort, which as we showed earlier is very efficient. This is the method used in the new Sort::Maker module.
+
To accomplish this goal, we modify the ST by replacing its anonymous arrays by packed strings. First we pack into a single string each subkey followed last by the operand to be sorted. Then we sort lexicographically on those strings, and finally we retrieve the operands from the end of the strings.
Several methods can be used, singly or in combination, to construct the packed strings, including concatenation, pack, or sprintf. Several methods can be used to retrieve the operands, including substr (shown here), which is likely to be the fastest, split, unpack or a regex.
+
Multiple subkeys are simply concatenated, suitably delimited if necessary. Techniques for computing subkeys of various types are presented in Appendix B.
+
Benchmarks of the packed-default sort
+
Benchmark A4 compares the two most advanced general-purpose sorting techniques, ST and packed-default. These multi-stage sorts are measured both as individual stages with saved intermediate data and as single statements.
+
The packed-default sort is about twice as fast as the ST, which is the fastest familiar Perl sorting algorithm.
+
Earlier, we showed a trivial sort using the lc function. Even for that case, the packed-default sort provides better performance when more than a few data items are being sorted. See Benchmark A5, which shows quasi-O(N) behavior for the packed-default sort (because the sorting time is small relative to the sortkey extraction).
+
Sorting a list of arrays or hashes
+
Consider the common problem of sorting a two-dimensional data structure, a list of references to arrays or to hashes, where the sortkeys are functions of the values of the submembers.
+
If we were to use the packed-default method, the references would be converted to strings and appended to the sortkeys. After sorting, the operands could be retrieved as strings, but would no longer be usable as references. Instead, we must use the indexes of the list members as the operands to be sorted.
+
The following benchmark compares a packed-sortkey ST sort with an indexed sort that uses the packed-default approach. The list being sorted comprises references to arrays, each of which has two elements: an IP address (which serves as the primary sortkey), and a domain name (which serves as the secondary sortkey). These are the same data as used in the above benchmarks, split into two array elements.
The indexed sort is faster than the ST once again. (See Benchmark A6.)
+
Indexed sorts and stable sorts
+
In the indexed sort, the auto-incrementing index $i ensures that no array records will have identical packed sortkeys. It also ensures that the sort will be stable.
+
Any Perl sort can be stabilized by using such an index as the final tie-breaking subkey. For an indexed sort, the index is actually the operand being sorted. This fact offers another possible performance advantage for the indexed sort. The actual records to be sorted (which may be long strings) need not be appended to the sortkeys, which would create a second copy of each record. Using the indexed sort, the records may be recovered after the sort from the original data, using the sorted indexes.
+
The Sort::Maker module
+
Sort::Maker is on CPAN and implements the GRT for
+all types of Perl values.
+
+
Conclusions
+
Packing of subkeys into strings that can be compared lexicographically improves the performance of all sorting techniques, relative to the usual method of comparing the individual subkeys in pairs.
+
Packing the operands with the sortkeys allows the sort to be done using the default ascending lexicographic comparison (without a sortsub). This yields a markedly faster sort than the Orcish Maneuver or the Schwartzian Transform. The sorting process may approximate O(N) behavior, because the O(N*logN) time for the sort itself is small compared to the time required to extract the sortkeys.
+
The packed-sortkey sort may be written explicitly, or the new Sort::Maker module may be used.
+
Acknowledgments
+
This idea was brought to our attention by Michal Rutka [12]. John Porter participated in initiating this project and reviewed a draft of the paper.
+
References
+
+
1. The sort function man page, http://www.perl.com/CPAN/doc/manual/html/pod/perlfunc/sort.html
+
2. Kernighan, B. W. & Pike, R., (1999). The Practice of Programming, p. 41. Addison-Wesley.
+
3. Pike, R. (1989). Notes on Programming in C, http://wwwwbs.cs.tu-berlin.de/~jutta/c/pikestyle.html
+
4. Knuth, D. E. (1998). The Art of Computer Programming : Sorting and Searching (Vol 3, 2nd ed), chap. 5. Addison-Wesley.
+
5. Sedgewick, R. (1983). Algorithms, chap. 10. Addison-Wesley.
+
6. ANSI/ISO 9899-1992, sect. 4.10.5.2. American National Standards Institute.
8. Hall, J. N. (1998). Effective Perl Programming, p. 48. Addison-Wesley.
+
9. How do I sort an array by (anything)?, http://www.perl.com/CPAN/doc/manual/html/pod/perlfaq4.html#How_do_I_sort_an_array_by_anyth
+
10. Christiansen, T. & Torkington, N. (1998). The Perl Cookbook, Recipe 4.15: "Sorting a List by Computable Field". O´Reilly.
+
11. Christiansen, T., Far More Than Everything You´ve Ever Wanted to Know About Sorting, http://www.perl.com/CPAN/doc/FMTEYEWTK/sort.html
+
12. Rutka, M., in comp.lang.perl.misc. http://x4.dejanews.com/[ST_rn=ps]/getdoc.xp?AN=397853353)
+
+
Appendix A: Benchmarks
+
A caveat: Useful benchmarking depends on judicious isolation of relevant variables, both in the algorithms being benchmarked and in the data sets used. Different implementations may give different relative results even with the same algorithms and data. Thus all such results should be verified under your own conditions. In short, your mileage may vary.
+
In the following benchmarks, all data represent the time (in microseconds) per line in the input data, which averages 35 characters per line. All named arrays and hashes are preallocated, which reduces the variance in the measurements due to storage allocation.
To create and combine the subkeys and the operand to be sorted, any combination of concatenation, interpolation, pack, or sprintf may be used, the latter two with simple or compound formats.
+
Fixed-length strings (ascending):
+
+
simple interpolation
+
pack('... An ...', ...)
+sprintf('... %s ...', ...)
+
+
Fixed-length strings (descending):
+
+
Bit-complement the string first.
+
$subkey = $string ^
+ "\xFF" x length $string
+
Then handle as an ascending fixed-length string.
+
+
Null bytes ("\0") are used to terminate string subkeys of varying length, as that ensures lexicographic ordering. If a string subkey may contain a null byte, then it must be of fixed length. If any of the operands to be sorted may contain null bytes, then every subkey must have fixed length.
+
Varying-length strings (ascending):
+
Terminate the string with a null byte, to separate it from succeeding subkeys or the operand.
+
+
interpolation:
"$string\0"
+
pack('... A* x ...', ...)
+
sprintf('... %s\0 ...', ...)
+
+
Varying-length strings (descending):
+
Make a prepass over the data to find the length of the longest string.
+
+
my $len = 0;
+$len < length and $len = length
+ for map KEY($_) => @data;
+
+
Then null-pad each string to that length and proceed as above for fixed-length strings (descending).
Signed two´s-complement 32-bit integers (ascending):
+
Bias to unsigned by xoring the sign bit, then treat as unsigned.
+
+
$subkey = pack('N',
+ $number ^ (1 << 31));
+
+
Floating-point numbers (ascending):
+
This code assumes that floating-point numbers are represented in binary using IEEE format. Create a subroutine that packs a double in network order (big-endian).
If any of the subkeys has varying length, make sure that the last character in the complete packed sortkey is a null byte, then search for it from the right:
+
diff --git a/slides/slides/slide-0101.html b/slides/slides/slide-0101.html
new file mode 100644
index 0000000..db2e16b
--- /dev/null
+++ b/slides/slides/slide-0101.html
@@ -0,0 +1,59 @@
+
+
+
+ A Little Past History
+
+
+
\ No newline at end of file
diff --git a/slides/slides/slide-0102.html b/slides/slides/slide-0102.html
new file mode 100644
index 0000000..84d32ca
--- /dev/null
+++ b/slides/slides/slide-0102.html
@@ -0,0 +1,50 @@
+
+
+
+ Recent History
+
+
+
\ No newline at end of file
diff --git a/slides/slides/slide-0112.html b/slides/slides/slide-0112.html
new file mode 100644
index 0000000..3bb3916
--- /dev/null
+++ b/slides/slides/slide-0112.html
@@ -0,0 +1,83 @@
+
+
+
+ Tests and Benchmarks
+
+
+
\ No newline at end of file
diff --git a/slides/slides/toc.html b/slides/slides/toc.html
new file mode 100644
index 0000000..f65a70e
--- /dev/null
+++ b/slides/slides/toc.html
@@ -0,0 +1,13 @@
+
+
+
+Table of Contents for Sort::Maker
+
+
+
+
diff --git a/slides/sort_maker.txt b/slides/sort_maker.txt
new file mode 100644
index 0000000..1d11f2b
--- /dev/null
+++ b/slides/sort_maker.txt
@@ -0,0 +1,271 @@
+
+book: Sort::Maker
+chapter: Sort::Maker
+
+title: A Little Past History
+
+*: TPC3 in Monterey, CA 1999
+*: Co-wrote and presented "A Fresh Look at Efficient Perl Sorting"
+html:
+ http://www.sysarch.com/perl/sort_paper.html
+*: Co-author was Larry Rosler (retired from HP and Perl)
+*: Introduced the GRT as a way to speed up Perl sorts
+*: It won the prize for Best Technical Paper
+*: A module Sort::Records was promised which would implement the GRT
+*: Sort::Records was started but a code and design review showed many
+ problems.
+*: Sort::Records was shelved during development because of a code review
+*: People still write to me asking about that module
+
+PAGE_END
+
+title: Recent History
+
+*: I have been thinking about this module for 5 years
+*: Solved the major design flaw - have the user provide key extraction
+ code
+*: A thread in perl6-language gave needed inspiration
+*: Sort::Maker emerges
+*: Much help in the API design and some coding tricks from Damian Conway
+PAGE_END
+
+title: Introducing Sort::Maker
+
+*: Easy description of sorts
+*: Generates a sort sub
+*: Choice of generated sort styles
+*: Can generate the highest speed sorts in Perl
+*: Generated source can be printed
+**: For study
+**: For pasting in code to save the generation time overhead
+*: These slides can be found at
+
+html:
+ http://www.stemsystems.com/slides/slides/index.html
+*: The current tarball is at
+
+html:
+ http://www.stemsystems.com/sort/Sort-Maker-0.01.tar.gz
+
+PAGE_END
+
+title: Sort::Maker Synopsis
+
+*: Exports a single sub 'make_sorter'
+*: Each key has its own description
+*: Call make_sorter with sort description arguments and it returns a
+ code reference.
+*: Call that reference with unsorted input records
+*: Returns sorted records
+
+code:
+
+ my $sorter = make_sorter(
+ qw( plain ),
+ number => [
+ code => '/(\d+)/',
+ 'descending',
+ ],
+ ) ;
+
+ my @sorted = $sorter->( @unsorted ) ;
+
+PAGE_END
+
+title: Key Descriptions
+
+*: Each key has a description
+*: Keys must be in the order they will sort (higher level keys must be
+ earlier)
+*: The key type must be specified as 'string' or 'number'
+*: Keys can have attributes such as 'descending' or 'case'
+*: Attributes can have defaults set for all the keys
+*: Default attributes can be overridden in any key description
+
+PAGE_END
+
+title: Extraction Code
+
+*: Each key needs code to extract it from a record
+*: Each record is aliased to $_ (via map)
+*: Key extraction code operates on $_ and gets the value for this key
+*: The extraction code is executed in list context so m/(foo)/ works
+*: The code is inside a do{} block so you can have multiple statements
+
+PAGE_END
+
+title: Sort Styles
+
+*: Four different sorting styles to choose from
+**: Plain
+**: Orcish Manouvre
+**: Schwartian Transform (ST)
+**: Guttman-Rosler Transform (GRT)
+*: Each has its uses and advantages
+*: Styles are really different ways to cache extracted keys
+*: Caching keys moves key extraction from O( N log N ) to O( N )
+*: In larger sizes of sort sets, caching keys is a very big win
+*: This is a classic sort of arrays of numbers
+**: Compare this code to the generated code for the different sort styles
+code:
+
+ sort { $a->[0] cmp $b->[0] ||
+ $a->[1] cmp $b->[1]
+ }
+
+PAGE_END
+
+title: Plain Sort
+
+*: No key caching
+*: Similar to common sorts with a code block
+*: Good for small sort sets as there is no caching
+*: Pass 'plain' option to make_sorter
+
+code:
+
+ sub {
+
+ sort {
+ do{ my( $left, $right ) = map { $_->[0] } $a, $b;
+ $left cmp $right }
+ ||
+ do{ my( $left, $right ) = map { $_->[1] } $a, $b;
+ $left cmp $right }
+
+ } @_ ;
+ }
+
+PAGE_END
+
+title: Orcish Manouvre Sort
+
+*: Caches extracted keys in a hash
+*: Assignments to the hash in ||=
+*: Called the orcish because of Or-Cache
+*: Created by Joseph Hall
+*: It will re-extract keys for any record with a false value
+*: The caching hash must be cleared beforehand
+**: Sort::Maker declares a caching hash in the anonymous sub
+*: Hash lookups are still O( N log N )
+*: Good for medium sort sets
+*: Pass 'orcish' option to make_sorter
+
+code:
+ sub {
+ my %or_cache ;
+
+
+ sort {
+ (
+ ( $or_cache{$a} ||=
+ do{ my ($val) = map { $_->[0] } $a ; $val } )
+ cmp
+ ( $or_cache{$b} ||=
+ do{ my ($val) = map { $_->[0] } $b ; $val } )
+ )
+ ||
+ (
+ ( $or_cache{$a} ||=
+ do{ my ($val) = map { $_->[1] } $a ; $val } )
+ cmp
+ ( $or_cache{$b} ||=
+ do{ my ($val) = map { $_->[1] } $b ; $val } )
+ )
+
+ } @_ ;
+ }
+
+PAGE_END
+
+title: Schwartian Transform (ST) Sort
+
+*: Caches extracted keys in an anonymous array for each input record
+*: Stores the record itself in slot 0 of the array
+*: Uses the map/sort/map idiom
+*: Popularized by Randal Schwartz
+*: Good for medium to large sort sets
+*: Key extraction is O( N )
+*: Does a full callback to Perl in the comparison block
+*: Pass 'ST' option to make_sorter
+
+code:
+sub {
+ map $_->[0],
+ sort {
+ $a->[1] cmp $b->[1]
+ ||
+ $a->[2] cmp $b->[2]
+
+ }
+ map [ $_,
+ do{ my ($val) = $_->[0] ; $val },
+ do{ my ($val) = $_->[1] ; $val }
+ ], @_ ;
+}
+
+PAGE_END
+
+title: Guttman-Rosler Transform (GRT) Sort
+
+*: Caches extracted keys in a single string for each input record
+*: String records can be stored in that string
+*: Reference records have their index stored in the cache string
+**: The sorted indices are used to slice into the input array to create
+ sorted records
+*: Key extraction is O( N )
+*: The comparison code is internal and uses C with no Perl callback (big
+ win)
+*: More complex to use properly and efficiently
+*: GRT has special key description attributes for optimization
+**: Numbers can be integer/float, signed/unsigned
+**: Strings can be fixed/varying
+**: Fastest sort style for larger sets of sort records
+*: Pass 'GRT' option to make_sorter
+code:
+sub {
+ my $rec_ind = 0 ;
+
+ return @_[
+ map unpack( 'N', substr( $_, -4 ) ),
+ sort
+ map pack( "Z*Z*N",
+ do{ my( $val ) = $_->[0] ; ( $val ) },
+ do{ my( $val ) = $_->[1] ; ( $val ) },
+ $rec_ind++
+ ), @_
+ ] ;
+}
+
+PAGE_END
+
+title: Tests and Benchmarks
+
+*: Test system is table driven
+*: Scripts can do both tests and benchmarks
+*: Provide a set of data
+**: Manually
+**: Generate in place with map
+**: Generate with a anonymous sub
+*: Provide a hand written 'golden' sort sub
+*: Provide the arguments to make_sorter()
+*: Many tests are in but more are wanted
+**: Send in tests if you want
+*: More complex tests are needed
+code:
+
+ {
+ skip => 0,
+ name => 'arrays of multiple strings',
+ source => 1,
+ data => [ map {
+ [ rand_token( 8, 20 ), rand_token( 8, 20 ), ]
+ } 1 .. 100
+ ],
+ gold => sub { $a->[0] cmp $b->[0] ||
+ $a->[1] cmp $b->[1] },
+ args => [ qw( string $_->[0] string $_->[1] ) ],
+ },
+
+PAGE_END
+
diff --git a/slides/templates/bullet.tmpl b/slides/templates/bullet.tmpl
new file mode 100644
index 0000000..ebd7300
--- /dev/null
+++ b/slides/templates/bullet.tmpl
@@ -0,0 +1 @@
+
<%TEXT%>
diff --git a/slides/templates/bullet2.tmpl b/slides/templates/bullet2.tmpl
new file mode 100644
index 0000000..ebd7300
--- /dev/null
+++ b/slides/templates/bullet2.tmpl
@@ -0,0 +1 @@
+
<%TEXT%>
diff --git a/slides/templates/code.tmpl b/slides/templates/code.tmpl
new file mode 100644
index 0000000..484c492
--- /dev/null
+++ b/slides/templates/code.tmpl
@@ -0,0 +1 @@
+
<%TEXT%>
\ No newline at end of file
diff --git a/slides/templates/footer.tmpl b/slides/templates/footer.tmpl
new file mode 100644
index 0000000..214b3d5
--- /dev/null
+++ b/slides/templates/footer.tmpl
@@ -0,0 +1,19 @@
+
+