Awk ignores leading whitespace on split. Perl by default does not.
The a2p translator couldn't handle this. The fix is partly to a2p
and partly to perl. Perl now has a way to specify to split to
ignore leading white space as awk does. A2p now takes advantage of
that.
I also threw in an optimization that let's runtime patterns
compile just once if they are known to be constant, so that
split(' ') doesn't compile the pattern every time.
-/* $Header: arg.c,v 1.0.1.5 88/01/30 08:53:16 root Exp $
+/* $Header: arg.c,v 1.0.1.6 88/02/01 17:32:26 root Exp $
*
* $Log: arg.c,v $
+ * Revision 1.0.1.6 88/02/01 17:32:26 root
+ * patch12: made split(' ') behave like awk in ignoring leading white space.
+ *
* Revision 1.0.1.5 88/01/30 08:53:16 root
* patch9: fixed some missing right parens introduced (?) by patch 2
*
char *d;
m = str_get(eval(spat->spat_runtime,Null(STR***)));
+ if (!*m || (*m == ' ' && !m[1])) {
+ m = "[ \\t\\n]+";
+ while (isspace(*s)) s++;
+ }
+ if (spat->spat_runtime->arg_type == O_ITEM &&
+ spat->spat_runtime[1].arg_type == A_SINGLE) {
+ arg_free(spat->spat_runtime); /* it won't change, so */
+ spat->spat_runtime = Nullarg; /* no point compiling again */
+ }
if (d = compile(&spat->spat_compex,m,TRUE,FALSE)) {
#ifdef DEBUGGING
deb("/%s/: %s\n", m, d);
-#define PATCHLEVEL 11
+#define PATCHLEVEL 12
''' Beginning of part 2
-''' $Header: perl.man.2,v 1.0.1.2 88/01/30 17:04:28 root Exp $
+''' $Header: perl.man.2,v 1.0.1.3 88/02/01 17:33:03 root Exp $
'''
''' $Log: perl.man.2,v $
+''' Revision 1.0.1.3 88/02/01 17:33:03 root
+''' patch12: documented split more adequately.
+'''
''' Revision 1.0.1.2 88/01/30 17:04:28 root
''' patch 11: random cleanup
'''
(Note that the delimiter may be longer than one character.)
Trailing null fields are stripped, which potential users of pop() would
do well to remember.
-A pattern matching the null string will split the value of EXPR into separate
-characters.
+A pattern matching the null string (not to be confused with a null pattern)
+will split the value of EXPR into separate characters at each point it
+matches that way.
+For example:
+.nf
+
+ print join(':',split(/ */,'hi there'));
+
+.fi
+produces the output 'h:i:t:h:e:r:e'.
+
+The pattern /PATTERN/ may be replaced with an expression to specify patterns
+that vary at runtime.
+As a special case, specifying a space ('\ ') will split on white space
+just as split with no arguments does, but leading white space does NOT
+produce a null first field.
+Thus, split('\ ') can be used to emulate awk's default behavior, whereas
+split(/\ /) will give you as many null initial fields as there are
+leading spaces.
.sp
Example:
.nf
-/* $Header: a2p.h,v 1.0.1.1 88/01/26 09:52:30 root Exp $
+/* $Header: a2p.h,v 1.0.1.2 88/02/01 17:33:40 root Exp $
*
* $Log: a2p.h,v $
+ * Revision 1.0.1.2 88/02/01 17:33:40 root
+ * patch12: forgot to fix #define YYDEBUG; bug in a2p.
+ *
* Revision 1.0.1.1 88/01/26 09:52:30 root
* patch 5: a2p didn't use config.h.
*
#ifdef DEBUGGING
EXT int debug INIT(0);
EXT int dlevel INIT(0);
-#define YYDEBUG;
+#define YYDEBUG 1
extern int yydebug;
#endif
-/* $Header: walk.c,v 1.0.1.1 88/01/28 11:07:56 root Exp $
+/* $Header: walk.c,v 1.0.1.2 88/02/01 17:34:05 root Exp $
*
* $Log: walk.c,v $
+ * Revision 1.0.1.2 88/02/01 17:34:05 root
+ * patch12: made a2p take advantage of new awk-compatible split in perl.
+ *
* Revision 1.0.1.1 88/01/28 11:07:56 root
* patch8: changed some misleading comments.
*
str_cat(str,"';\t\t# field separator from -F switch\n");
}
else if (saw_FS && !const_FS) {
- str_cat(str,"$FS = '[ \\t\\n]+';\t\t# set field separator\n");
+ str_cat(str,"$FS = ' ';\t\t# set field separator\n");
}
if (saw_OFS) {
str_cat(str,"$, = ' ';\t\t# set output field separator\n");
str_scat(str,fstr=walk(1,level,ops[node+3].ival,&numarg));
str_free(fstr);
numeric |= numarg;
- if (strEQ(str->str_ptr,"$FS = '\240'"))
- str_set(str,"$FS = '[\240\\n\\t]+'");
break;
case OADD:
str = walk(1,level,ops[node+1].ival,&numarg);
else if (saw_FS)
str_cat(str,"$FS");
else
- str_cat(str,"/[ \\t\\n]+/");
+ str_cat(str,"' '");
str_cat(str,", ");
str_scat(str,fstr=walk(1,level,ops[node+1].ival,&numarg));
str_free(fstr);
else if (saw_FS)
str_cat(str," = split($FS);\n");
else
- str_cat(str," = split;\n");
+ str_cat(str," = split(' ');\n");
tab(str,level);
}