From: Nick Ing-Simmons Date: Sun, 28 Jan 2001 19:22:40 +0000 (+0000) Subject: PerlIO documentation. Proofing and sub-editing requested. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=50b80e251bdb7e866faeb20ccfeb18b474ffc6cb;p=p5sagit%2Fp5-mst-13.2.git PerlIO documentation. Proofing and sub-editing requested. p4raw-id: //depot/perlio@8581 --- diff --git a/pod/perlapio.pod b/pod/perlapio.pod index 90475a9..88a509c 100644 --- a/pod/perlapio.pod +++ b/pod/perlapio.pod @@ -4,71 +4,121 @@ perlapio - perl's IO abstraction interface. =head1 SYNOPSIS + #define PERLIO_NOT_STDIO 0 /* For co-existance with stdio only */ + #include /* Usually via #include */ + PerlIO *PerlIO_stdin(void); PerlIO *PerlIO_stdout(void); PerlIO *PerlIO_stderr(void); - PerlIO *PerlIO_open(const char *,const char *); - int PerlIO_close(PerlIO *); - - int PerlIO_stdoutf(const char *,...) - int PerlIO_puts(PerlIO *,const char *); - int PerlIO_putc(PerlIO *,int); - int PerlIO_write(PerlIO *,const void *,size_t); - int PerlIO_printf(PerlIO *, const char *,...); - int PerlIO_vprintf(PerlIO *, const char *, va_list); - int PerlIO_flush(PerlIO *); - - int PerlIO_eof(PerlIO *); - int PerlIO_error(PerlIO *); - void PerlIO_clearerr(PerlIO *); - - int PerlIO_getc(PerlIO *); - int PerlIO_ungetc(PerlIO *,int); - int PerlIO_read(PerlIO *,void *,size_t); - - int PerlIO_fileno(PerlIO *); - PerlIO *PerlIO_fdopen(int, const char *); - PerlIO *PerlIO_importFILE(FILE *, int flags); - FILE *PerlIO_exportFILE(PerlIO *, int flags); - FILE *PerlIO_findFILE(PerlIO *); - void PerlIO_releaseFILE(PerlIO *,FILE *); - - void PerlIO_setlinebuf(PerlIO *); - - long PerlIO_tell(PerlIO *); - int PerlIO_seek(PerlIO *,off_t,int); - int PerlIO_getpos(PerlIO *,Fpos_t *) - int PerlIO_setpos(PerlIO *,Fpos_t *) - void PerlIO_rewind(PerlIO *); - - int PerlIO_has_base(PerlIO *); - int PerlIO_has_cntptr(PerlIO *); - int PerlIO_fast_gets(PerlIO *); - int PerlIO_canset_cnt(PerlIO *); - - char *PerlIO_get_ptr(PerlIO *); - int PerlIO_get_cnt(PerlIO *); - void PerlIO_set_cnt(PerlIO *,int); - void PerlIO_set_ptrcnt(PerlIO *,char *,int); - char *PerlIO_get_base(PerlIO *); - int PerlIO_get_bufsiz(PerlIO *); + PerlIO *PerlIO_open(const char *path,const char *mode); + PerlIO *PerlIO_fdopen(int fd, const char *mode); + PerlIO *PerlIO_reopen(const char *path, const char *mode, PerlIO *old); /* deprecated */ + int PerlIO_close(PerlIO *f); + + int PerlIO_stdoutf(const char *fmt,...) + int PerlIO_puts(PerlIO *f,const char *string); + int PerlIO_putc(PerlIO *f,int ch); + int PerlIO_write(PerlIO *f,const void *buf,size_t numbytes); + int PerlIO_printf(PerlIO *f, const char *fmt,...); + int PerlIO_vprintf(PerlIO *f, const char *fmt, va_list args); + int PerlIO_flush(PerlIO *f); + + int PerlIO_eof(PerlIO *f); + int PerlIO_error(PerlIO *f); + void PerlIO_clearerr(PerlIO *f); + + int PerlIO_getc(PerlIO *d); + int PerlIO_ungetc(PerlIO *f,int ch); + int PerlIO_read(PerlIO *f, void *buf, size_t numbytes); + + int PerlIO_fileno(PerlIO *f); + + void PerlIO_setlinebuf(PerlIO *f); + + Off_t PerlIO_tell(PerlIO *f); + int PerlIO_seek(PerlIO *f, Off_t offset, int whence); + void PerlIO_rewind(PerlIO *f); + + int PerlIO_getpos(PerlIO *f, SV *save); /* prototype changed */ + int PerlIO_setpos(PerlIO *f, SV *saved); /* prototype changed */ + + int PerlIO_fast_gets(PerlIO *f); + int PerlIO_has_cntptr(PerlIO *f); + int PerlIO_get_cnt(PerlIO *f); + char *PerlIO_get_ptr(PerlIO *f); + void PerlIO_set_ptrcnt(PerlIO *f, char *ptr, int count); + + int PerlIO_canset_cnt(PerlIO *f); /* deprecated */ + void PerlIO_set_cnt(PerlIO *f, int count); /* deprecated */ + + int PerlIO_has_base(PerlIO *f); + char *PerlIO_get_base(PerlIO *f); + int PerlIO_get_bufsiz(PerlIO *f); + + PerlIO *PerlIO_importFILE(FILE *stdio, int flags); + FILE *PerlIO_exportFILE(PerlIO *f, int flags); + FILE *PerlIO_findFILE(PerlIO *f); + void PerlIO_releaseFILE(PerlIO *f,FILE *stdio); + + int PerlIO_apply_layers(PerlIO *f, const char *mode, const char *layers); + int PerlIO_binmode(PerlIO *f, int ptype, int imode, const char *layers); + void PerlIO_debug(const char *fmt,...) =head1 DESCRIPTION -Perl's source code should use the above functions instead of those -defined in ANSI C's I. The perl headers will C<#define> them to -the I/O mechanism selected at Configure time. +Perl's source code, and extensions that want maximum portability, should use the above +functions instead of those defined in ANSI C's I. The perl headers (in +particular "perlio.h") will C<#define> them to the I/O mechanism selected at Configure time. The functions are modeled on those in I, but parameter order has been "tidied up a little". +C takes the place of FILE *. Like FILE * it should be treated as +opaque (it is probably safe to assume it is a pointer to something). + +There are currently three implementations: + =over 4 -=item B +=item 1. USE_STDIO -This takes the place of FILE *. Like FILE * it should be treated as -opaque (it is probably safe to assume it is a pointer to something). +All above are #define'd to stdio functions or are trivial wrapper functions which +call stdio. In this case I PerlIO * is a FILE *. +This has been the default implementation since the abstraction was introduced +in perl5.003_02. + +=item 2. USE_SFIO + +A "legacy" implementation in terms of the "sfio" library. Used for some specialist +applications on Unix machines ("sfio" is not widely ported away from Unix). +Most of above are #define'd to the sfio functions. PerlIO * is in this case Sfio_t *. + +=item 3. USE_PERLIO + +Introduced just after perl5.7.0 this is a re-implementation of the above abstraction +which allows perl more control over how IO is done as it decouples IO from the +way the operating system and C library choose to do things. For USE_PERLIO +PerlIO * has an extra layer of indirection - it is a pointer-to-a-pointer. +This allows the PerlIO * to remain with a known value while swapping the +implementation arround underneath I. In this case all the +above are true (but very simple) functions which call the underlying implementation. + +This is the only implementation for which C does anything +"interesting". + +The USE_PERLIO implementation is described in L. + +=back + +Because "perlio.h" is a thing layer (for efficiency) the semantics of these functions are +somewhat dependant on the the underlying implementation. Where these variations are +understood they are noted below. + +Unless otherwise noted functions returns 0 on sucess, or a negative value (usually +C which is usually -1) and set C on error. + +=over 4 =item B, B, B @@ -81,6 +131,19 @@ values. =item B, B These correspond to fopen()/fdopen() arguments are the same. +Returns C and sets C if there is an error. +There may be implementation limit on number of open handles, which may +be lower than the limit on the number of open files - C may +not be when C is returned if this limnit is exceeded. + +=item B + +Perl prefers to C the new low-level descriptor to the descriptor used +by the existing PerlIO. This may become the behaviour of this function +in the future. =item B, B @@ -95,10 +158,16 @@ so it is (currently) legal to use C in perl sources. These correspond to fread() and fwrite(). Note that arguments are different, there is only one "count" and order has -"file" first. +"file" first. Returns a byte count if successful (which may be zero), +returns negative value and sets C on error. +Depending on implementation C may be C if operation +was interrupted by a signal. =item B +Depending on implementation C may be C if operation +was interrupted by a signal. + =item B, B These correspond to fputs() and fputc(). @@ -108,56 +177,103 @@ Note that arguments have been revised to have "file" first. This corresponds to ungetc(). Note that arguments have been revised to have "file" first. +Arranges that next read operation will return the byte B. +Despite the implied "character" in the name only values in the +range 0..0xFF are defined. Returns the byte B on success or -1 (C) on error. +The number of bytes that can be "pushed back" may vary, only 1 character is +certain, and then only if it is the last character that was read from the handle. =item B This corresponds to getc(). +Despite the c in the name only byte range 0..0xFF is supported. +Returns the charactr read or -1 (C) on error. =item B This corresponds to feof(). +Returns a true/false indication of whether the handle is at end of file. +For terminal devices this may or may not be "sticky" depending on the implementation. +The flag is cleared by PerlIO_seek(), or PerlIO_rewind(). =item B This corresponds to ferror(). +Returns a true/false indication of whether there has been an IO error on the handle. =item B This corresponds to fileno(), note that on some platforms, -the meaning of "fileno" may not match Unix. +the meaning of "fileno" may not match Unix. Returns -1 if the handle has not +open descriptor associated with it. =item B -This corresponds to clearerr(), i.e., clears 'eof' and 'error' -flags for the "stream". +This corresponds to clearerr(), i.e., clears 'error' and (usually) 'eof' +flags for the "stream". Does not return a value. =item B This corresponds to fflush(). +Sends any buffered write data to the underlying file. +If called with C this may flush all open streams (or core dump). +Calling on a handle open for read only, or on which last operation was a read of some kind +may lead to undefined behaviour. -=item B +=item B -This corresponds to ftell(). +This corresponds to fseek(). +Sends buffered write data to the underlying file, or discards any buffered +read data, then positions the file desciptor as specified by B and B (sic). +This is the correct thing to do when switching between read and write on the same +handle (see issues with PerlIO_flush() above). +Offset is of type C which is a perl Configure value which may not be same +as stdio's C. -=item B +=item B -This corresponds to fseek(). +This corresponds to ftell(). +Returns the current file position, or (Off_t) -1 on error. +May just return value system "knows" without making a system call or checking +the underlying file descriptior (so use on shared file descriptors is not +safe without a PerlIO_seek()). Return value is of type C which is a perl Configure +value which may not be same as stdio's C. =item B, B -These correspond to fgetpos() and fsetpos(). If platform does not -have the stdio calls then they are implemented in terms of PerlIO_tell() -and PerlIO_seek(). +These correspond (loosely) to fgetpos() and fsetpos(). Rather than stdio's Fpos_t +they expect a "Perl Scalar Value" to be passed. What is stored there should +be considered opaque. They layout of the data may vary from handle to handle. +When not using stdio or if platform does not have the stdio calls then they are +implemented in terms of PerlIO_tell() and PerlIO_seek(). =item B -This corresponds to rewind(). Note may be redefined -in terms of PerlIO_seek() at some point. +This corresponds to rewind(). It is usually defined as being + + PerlIO_seek(f,(Off_t)0L, SEEK_SET); + PerlIO_clearerr(f); + =item B This corresponds to tmpfile(), i.e., returns an anonymous -PerlIO which will automatically be deleted when closed. +PerlIO or NULL on error. +The system will attempt to automatically be delete then file when closed. +On Unix the file is usually C-ed just after +it is created so it does not matter how it gets closed. On other systems the file may +only be deleted if closed via PerlIO_close() and/or the program exits via C. +Depending on the implementation there may be "race conditions" which allow other +processes access to the file, though in general it will be safer in this regard +than ad. hoc. schemes. + +=item B + +This corresponds to setlinebuf(). +Does not return a value. What constitutes a "line" is implementation +dependant but usually means that writing "\n" flushes the buffer. +What happens with things like "this\nthat" is uncertain. +(Perl core uses it I when "dumping"; it has nothing to do with $| auto-flush.) =back @@ -165,9 +281,19 @@ PerlIO which will automatically be deleted when closed. There is outline support for co-existence of PerlIO with stdio. Obviously if PerlIO is implemented in terms of stdio there is -no problem. However if perlio is implemented on top of (say) sfio -then mechanisms must exist to create a FILE * which can be passed -to library code which is going to use stdio calls. +no problem. However in other cases then mechanisms must exist to create a FILE * +which can be passed to library code which is going to use stdio calls. + +The fisrt step is to add this line: + + #define PERLIO_NOT_STDIO 0 + +I including any perl header files. (This will probably become the +default at some point). That prevents "perlio.h" from attempting to +#define stdio functions onto PerlIO functions. + +XS code is probably better using "typemap" if it expects FILE * arguments. +The standard typemap will be adjusted to comprehend and changes in this area. =over 4 @@ -176,6 +302,10 @@ to library code which is going to use stdio calls. Used to get a PerlIO * from a FILE *. May need additional arguments, interface under review. +The flags argument was meant to be used for read vs write vs read/write +information. In hindsight it would have been better to make it a char *mode +as in fopen/freopen. + =item B Given an PerlIO * return a 'native' FILE * suitable for @@ -198,77 +328,158 @@ of FILE * is complete. It is removed from list of 'exported' FILE *s, and associated PerlIO * should revert to original behaviour. -=item B - -This corresponds to setlinebuf(). Use is deprecated pending -further discussion. (Perl core uses it I when "dumping"; -it has nothing to do with $| auto-flush.) - =back -In addition to user API above there is an "implementation" interface +=head2 "Fast gets" Functions + +In addition to standard-like API defined so far above there is an "implementation" interface which allows perl to get at internals of PerlIO. The following calls correspond to the various FILE_xxx macros determined -by Configure. This section is really of interest to only those -concerned with detailed perl-core behaviour or implementing a -PerlIO mapping. +by Configure - or their equivalent in other implementations. This section is really of +interest to only those concerned with detailed perl-core behaviour, implementing a +PerlIO mapping or writing code which can make use of the "read ahead" that has been done by +the IO system in the same way perl does. Note that any code that uses these interfaces +must be prepared to do things the traditional way if a handle does not support +them. =over 4 -=item B - -Implementation can return pointer to current position in the "buffer" and -a count of bytes available in the buffer. +=item B -=item B +Returns true if implementation has all the interfaces required to +allow perl's C to "bypass" normal IO mechanism. +This can vary from handle to handle. -Return pointer to next readable byte in buffer. + PerlIO_fast_gets(f) = PerlIO_has_cntptr(f) && \ + PerlIO_canset_cnt(f) && \ + `Can set pointer into buffer' -=item B -Return count of readable bytes in the buffer. +=item B -=item B +Implementation can return pointer to current position in the "buffer" and +a count of bytes available in the buffer. +Do not use this - use PerlIO_fast_gets. -Implementation can adjust its idea of number of -bytes in the buffer. +=item B -=item B +Return count of readable bytes in the buffer. Zero or negative return means +no more bytes available. -Implementation has all the interfaces required to -allow perl's fast code to handle mechanism. +=item B - PerlIO_fast_gets(f) = PerlIO_has_cntptr(f) && \ - PerlIO_canset_cnt(f) && \ - `Can set pointer into buffer' +Return pointer to next readable byte in buffer, accessing via the pointer +(dereferencing) is only safe if PerlIO_get_cnt() has returned a positive value. +Only positive offsets up to value returned by PerlIO_get_cnt() are allowed. =item B Set pointer into buffer, and a count of bytes still in the buffer. Should be used only to set pointer to within range implied by previous calls -to C and C. +to C and C. The two values I be consistent +with each other (implementartion may only use one or the other or may require both). + +=item B + +Implementation can adjust its idea of number of bytes in the buffer. +Do not use this - use PerlIO_fast_gets. =item B Obscure - set count of bytes in the buffer. Deprecated. +Only usable if PerlIO_canset_cnt() returns true. Currently used in only doio.c to force count < -1 to -1. Perhaps should be PerlIO_set_empty or similar. This call may actually do nothing if "count" is deduced from pointer and a "limit". +Do not use this - use PerlIO_set_ptrcnt(). =item B -Implementation has a buffer, and can return pointer +Returns true if implementation has a buffer, and can return pointer to whole buffer and its size. Used by perl for B<-T> / B<-B> tests. Other uses would be very obscure... =item B -Return I of buffer. +Return I of buffer. Access only positive offsets in the buffer +up to the value returned by PerlIO_get_bufsiz(). =item B -Return I of buffer. +Return the I in the buffer, this is neither the number +that can be read, nor the amount of memory allocated to the buffer. Rather +it is what the operating system and/or implementation happened to C +(or whatever) last time IO was requested. + +=back + +=head2 Other Functions + +=over 4 + +=item PerlIO_apply_layers(f,mode,layers) + +The new interface to the USE_PERLIO implementation. The layers ":crlf" +and ":raw" are only ones allowed for other implementations and those +are silently ignored. Use PerlIO_binmode() below for the portable +case. + +=item PerlIO_binmode(f,ptype,imode,layers) + +The hook used by perl's C operator. +B is perl's charcter for the kind of IO: + +=over 8 + +=item '<' read + +=item '>' write + +=item '+' read/write + +=back + +B is C or C. + +B is a string of layers to apply, only ":raw" or :"crlf" make +sense in the non USE_PERLIO case. + +Portable cases are: + + PerlIO_binmode(f,ptype,O_BINARY,":raw"); +and + PerlIO_binmode(f,ptype,O_TEXT,":crlf"); + +On Unix these calls probably haver no effect what so ever. +Elsewhere they alter "\n" to CR,LF translation and possibly cause a special +text "end of file" indicator to be written or honoured on read. The effect of +making the call after doing any IO to the handle depends on the implementation. (It may be +ignored, affect any data which is already buffered as well, or only apply +to subsequent data.) + +=item PerlIO_debug(fmt,...) + +PerlIO_debug is a printf()-like function which can be used for debugging. +No return value. Its main use is inside PerlIO where using real printf, warn() etc. would +recursively call PerlIO and be a problem. + +PerlIO_debug writes to the file named by $ENV{'PERLIO_DEBUG'} typical use +might be + + + Bourne shells: + PERLIO_DEBUG=/dev/tty ./perl somescript some args + + Csh: + setenv PERLIO_DEBUG /dev/tty + ./perl somescript some args + + Win32: + set PERLIO_DEBUG=CON + perl somescript some args + +If $ENV{'PERLIO_DEBUG'} is not set PerlIO_debug() is a no-op. =back diff --git a/pod/perliol.pod b/pod/perliol.pod new file mode 100644 index 0000000..6169d43 --- /dev/null +++ b/pod/perliol.pod @@ -0,0 +1,513 @@ + +=head1 NAME + +perliol - C API for Perl's implementation of IO in Layers. + +=head1 SYNOPSIS + + /* Defining a layer ... */ + #include + + +=head1 DESCRIPTION + +This document describes the behavior and implementation of the PerlIO abstraction +described in L when C is defined (and C is not). + +=head2 History and Background + +The PerlIO abstraction was introduced in perl5.003_02 but languished as just +an abstraction until perl5.7.0. However during that time a number of perl extenstions +switch to using it, so the API is mostly fixed to maintain (source) compatibility. + +The aim of the implementation is to provide the PerlIO API in a flexible and +platform neutral manner. It is also a trial of an "Object Oriented C, with vtables" +approach which may be applied to perl6. + +=head2 Layers vs Disciplines + +Initial discussion of the ability to modify IO streams behaviour used the term +"discipline" for the entities which were added. This came (I believe) from the use +of the term in "sfio", which in turn borowed it from "line disciplines" on Unix +terminals. However, this document (and the C code) uses the term "layer". +This is I hope a natural term given the implementation, and should avoid conotations +that are inherent in earlier uses of "discipline" for things which are rather different. + +=head2 Data Structures + +The basic data structure is a PerlIOl: + + typedef struct _PerlIO PerlIOl; + typedef struct _PerlIO_funcs PerlIO_funcs; + typedef PerlIOl *PerlIO; + + struct _PerlIO + { + PerlIOl * next; /* Lower layer */ + PerlIO_funcs * tab; /* Functions for this layer */ + IV flags; /* Various flags for state */ + }; + +A PerlIOl * is a pointer to to the struct, and the I level PerlIO * +is a pointer to a PerlIOl * - i.e. a pointer to a pointer to the struct. +This allows the application level PerlIO * to remain constant while the actual +PerlIOl * underneath changes. (Compare perl's SV * which remains constant +while its sv_any field changes as the scalar's type changes.) +An IO stream is then in general represented as a pointer to this linked-list +of "layers". + +It should be noted that because of the double indirection in a PerlIO *, +a &(perlio->next) "is" a PerlIO *, and so to some degree at least +one layer can use the "standard" API on the next layer down. + +A "layer" is composed of two parts: + +=over 4 + +=item 1. The functions and attributes of the "layer class". + +=item 2. The per-instance data for a particular handle. + +=back + +=head2 Functions and Attributes + +The functions and attributes are accessed via the "tab" (for table) member of +PerlIOl. The functions (methods of the layer "class") are fixed, and are defined by the +PerlIO_funcs type. They are broadly the same as the public PerlIO_xxxxx functions: + + struct _PerlIO_funcs + { + char * name; + Size_t size; + IV kind; + IV (*Fileno)(PerlIO *f); + PerlIO * (*Fdopen)(PerlIO_funcs *tab, int fd, const char *mode); + PerlIO * (*Open)(PerlIO_funcs *tab, const char *path, const char *mode); + int (*Reopen)(const char *path, const char *mode, PerlIO *f); + IV (*Pushed)(PerlIO *f,const char *mode,const char *arg,STRLEN len); + IV (*Popped)(PerlIO *f); + /* Unix-like functions - cf sfio line disciplines */ + SSize_t (*Read)(PerlIO *f, void *vbuf, Size_t count); + SSize_t (*Unread)(PerlIO *f, const void *vbuf, Size_t count); + SSize_t (*Write)(PerlIO *f, const void *vbuf, Size_t count); + IV (*Seek)(PerlIO *f, Off_t offset, int whence); + Off_t (*Tell)(PerlIO *f); + IV (*Close)(PerlIO *f); + /* Stdio-like buffered IO functions */ + IV (*Flush)(PerlIO *f); + IV (*Fill)(PerlIO *f); + IV (*Eof)(PerlIO *f); + IV (*Error)(PerlIO *f); + void (*Clearerr)(PerlIO *f); + void (*Setlinebuf)(PerlIO *f); + /* Perl's snooping functions */ + STDCHAR * (*Get_base)(PerlIO *f); + Size_t (*Get_bufsiz)(PerlIO *f); + STDCHAR * (*Get_ptr)(PerlIO *f); + SSize_t (*Get_cnt)(PerlIO *f); + void (*Set_ptrcnt)(PerlIO *f,STDCHAR *ptr,SSize_t cnt); + }; + +The first few members of the struct give a "name" for the layer, the size to C +for the per-instance data, and some flags which are attributes of the class as whole +(such as whether it is a buffering layer), then follow the functions which fall into +four basic groups: + +=over 4 + +=item 1. Opening and setup functions + +=item 2. Basic IO operations + +=item 3. Stdio class buffering options. + +=item 4. Functions to support Perl's traditional "fast" access to the buffer. + +=back + +A layer does not have to implement all the functions, but the whole table has +to be present. Unimplemented slots can be NULL (which will will result in an error +when called) or can be filled in with stubs to "inherit" behaviour from +a "base class". This "inheritance" is fixed for all instances of the layer, +but as the layer chooses which stubs to populate the table, limited +"multiple inheritance" is possible. + +=head2 Per-instance Data + +The per-instance data are held in memory beyond the basic PerlIOl struct, +by making a PerlIOl the first member of the layer's struct thus: + + typedef struct + { + struct _PerlIO base; /* Base "class" info */ + STDCHAR * buf; /* Start of buffer */ + STDCHAR * end; /* End of valid part of buffer */ + STDCHAR * ptr; /* Current position in buffer */ + Off_t posn; /* Offset of buf into the file */ + Size_t bufsiz; /* Real size of buffer */ + IV oneword; /* Emergency buffer */ + } PerlIOBuf; + +In this way (as for perl's scalars) a pointer to a PerlIOBuf can be treated +as a pointer to a PerlIOl. + +=head2 Layers in action. + + table perlio unix + | | + +-----------+ +----------+ +--------+ + PerlIO ->| |--->| next |--->| NULL | + +-----------+ +----------+ +--------+ + | | | buffer | | fd | + +-----------+ | | +--------+ + | | +----------+ + + +The above attempts to show how the layer scheme works in a simple case. +The applications PerlIO * points to an entry in the table(s) representing open +(allocated) handles. For example the first three slots in the table correspond +to C,C and C. The table in turn points to the current +"top" layer for the handle - in this case an instance of the generic buffering +layer "perlio". That layer in turn points to the next layer down - in this +case the lowlevel "unix" layer. + +The above is roughly equivalent to a "stdio" buffered stream, but with much more +flexibility: + +=over 4 + +=item * + +If Unix level read/write/lseek is not appropriate for (say) sockets then +the "unix" layer can be replaced (at open time or even dynamically) with a +"socket" layer. + +=item * + +Different handles can have different buffering schemes. The "top" layer +could be the "mmap" layer if reading disk files was quicker using C +than C. An "unbuffered" stream can be implemented simply by +not having a buffer layer. + +=item * + +Extra layers can be inserted to process the data as it flows through. +This was the driving need for including the scheme in perkl5.70+ - we needed a mechanism +to allow data to be translated bewteen perl's internal encoding (conceptually +at least Unicode as UTF-8), and the "native" format used by the system. +This is provided by the ":encoding(xxxx)" layer which typically sits above +the buffering layer. + +=item * + +A layer can be added that does "\n" to CRLF translation. This layer can be used +on any platform, not just those that normally do such things. + +=back + +=head2 Per-instance flag bits + +The generic flag bits are a hybrid of O_XXXXX style flags deduced from +the mode string passed to PerlIO_open() and state bits for typical buffer +layers. + +=over4 + +=item PERLIO_F_EOF + +End of file. + +=item PERLIO_F_CANWRITE + +Writes are permited i.e. opened as "w" or "r+" or "a". etc. + +=item PERLIO_F_CANREAD + +Reads are permited i.e. opened "r" or "w+" (or even "a+" - ick). + +=item PERLIO_F_ERROR + +An error has occured (for PerlIO_error()) + +=item PERLIO_F_TRUNCATE + +Truncate file suggested by open mode. + +=item PERLIO_F_APPEND + +All writes should be appends. + +=item PERLIO_F_CRLF + +Layer is performing Win32-like "\n" => CR,LF for output and CR,LF => "\n" for +input. Normally the provided "crlf" layer is only layer than need bother about +this. PerlIO_binmode() will mess with this flag rather than add/remove layers +if the PERLIO_K_CANCRLF bit is set for the layers class. + +=item PERLIO_F_UTF8 + +Data for this written to this layer should be UTF-8 encoded, data provided +by this layer should be considered UTF-8 encoded. Can be set on any layer +by ":utf8" dummy layer. Also set on ":encoding" layer. + +=item PERLIO_F_UNBUF + +Layer is unbuffered - i.e. write to next layer down should occur for +each write to this layer. + +=item PERLIO_F_WRBUF + +The buffer for this layer currently holds data written to it but not sent +to next layer. + +=item PERLIO_F_RDBUF + +The buffer for this layer currently holds unconsumed data read from +layer below. + +=item PERLIO_F_LINEBUF + +Layer is line buffered. Write data should be passed to next layer down whenever a +"\n" is seen. Any data beyond the "\n" should then be processed. + +=item PERLIO_F_TEMP + +File has been unlink()ed, or should be deleted on close(). + +=item PERLIO_F_OPEN + +Handle is open. + +=item PERLIO_F_FASTGETS + +This instance of this layer supports the "fast gets" interface. +Normally set based on PERLIO_K_FASTGETS for the class and by the +existance of the function(s) in the table. However a class that +normally provides that interface may need to avoid it on a +particular instance. The "pending" layer needs to do this when +it is pushed above an layer which does not support the interface. +(Perls sv_gets() does not expect the steams fast gets behaviour +to change during one "get".) + +=back + +=head2 Methods in Detail + +=over 4 + +=item IV (*Fileno)(PerlIO *f); + +Returns the Unix/Posix numeric file decriptor for the handle. +Normally PerlIOBase_fileno() (which just asks next layer down) will suffice for this. + +=item PerlIO * (*Fdopen)(PerlIO_funcs *tab, int fd, const char *mode); + +Should (perhaps indirectly) call PerlIO_allocate() to allocate a slot +in the table and associate it with the given numeric file descriptor, +which will be open in an manner compatible with the supplied mode string. + +=item PerlIO * (*Open)(PerlIO_funcs *tab, const char *path, const char *mode); + +Should attempt to open the given path and if that succeeds then (perhaps indirectly) +call PerlIO_allocate() to allocate a slot in the table and associate it with the +layers information for the opened file. + +=item int (*Reopen)(const char *path, const char *mode, PerlIO *f); + +Re-open the supplied PerlIO * to connect it to C in C. Returns as success flag. +Perl does not use this and L marks it as subject to change. + +=item IV (*Pushed)(PerlIO *f,const char *mode,const char *arg,STRLEN len); + +Called when the layer is pushed onto the stack. The C argument may be NULL if this +occurs post-open. The C and C will be present if an argument string was +passed. In most cases this should call PerlIOBase_pushed() to conver C into +the appropriate PERLIO_F_XXXXX flags in addition to any actions the layer itself takes. + +=item IV (*Popped)(PerlIO *f); + +Called when the layer is popped from the stack. A layer will normally be popped after +Close() is called. But a layer can be popped without being closed if the program +is dynamically managing layers on the stream. In such cases Popped() should free +any resources (buffers, translation tables, ...) not held directly in the layer's +struct. + +=item SSize_t (*Read)(PerlIO *f, void *vbuf, Size_t count); + +Basic read operation. Returns actual bytes read, or -1 on an error. +Typically will call Fill and manipulate pointers (possibly via the API). +PerlIOBuf_read() may be suitable for derived classes which provide "fast gets" methods. + +=item SSize_t (*Unread)(PerlIO *f, const void *vbuf, Size_t count); + +A superset of stdio's ungetc(). Should arrange for future reads to see the bytes in C. +If there is no obviously better implementation then PerlIOBase_unread() provides +the function by pushing a "fake" "pending" layer above the calling layer. + +=item SSize_t (*Write)(PerlIO *f, const void *vbuf, Size_t count); + +Basic write operation. Returns bytes written or -1 on an error. + +=item IV (*Seek)(PerlIO *f, Off_t offset, int whence); + +Position the file pointer. Should normally call its own Flush method and +then the Seek method of next layer down. + +=item Off_t (*Tell)(PerlIO *f); + +Return the file pointer. May be based on layers cached concept of position to +avoid overhead. + +=item IV (*Close)(PerlIO *f); + +Close the stream. Should normally call PerlIOBase_close() to flush itself +and Close layers below and then deallocate any data structures (buffers, translation +tables, ...) not held directly in the data structure. + +=item IV (*Flush)(PerlIO *f); + +Should make streams state consistent with layers below. That is any +buffered write data should be written, and file position of lower layer +adjusted for data read fron below but not actually consumed. + +=item IV (*Fill)(PerlIO *f); + +The buffer for this layer should be filled (for read) from layer below. + +=item IV (*Eof)(PerlIO *f); + +Return end-of-file indicator. PerlIOBase_eof() is normally sufficient. + +=item IV (*Error)(PerlIO *f); + +Return error indicator. PerlIOBase_error() is normally sufficient. + +=item void (*Clearerr)(PerlIO *f); + +Clear end-of-file and error indicators. Should call PerlIOBase_clearerr() +to set the PERLIO_F_XXXXX flags, which may suffice. + +=item void (*Setlinebuf)(PerlIO *f); + +Mark the stream as line buffered. + +=item STDCHAR * (*Get_base)(PerlIO *f); + +Allocate (if not already done so) the read buffer for this layer and +return pointer to it. + +=item Size_t (*Get_bufsiz)(PerlIO *f); + +Return the number of bytes that last Fill() put in the buffer. + +=item STDCHAR * (*Get_ptr)(PerlIO *f); + +Return the current read pointer relative to this layers buffer. + +=item SSize_t (*Get_cnt)(PerlIO *f); + +Return the number of bytes left to be read in the current buffer. + +=item void (*Set_ptrcnt)(PerlIO *f,STDCHAR *ptr,SSize_t cnt); + +Adjust the read pointer and count of bytes to match C and/or C. +The application (or layer above) must ensure they are consistent. +(Checking is allowed by the paranoid.) + +=back + + +=head2 Core Layers + +The file C provides the following layers: + +=over 4 + +=item "unix" + +A basic non-buffered layer which calls Unix/POSIX read(), write(), lseek(), close(). +No buffering. Even on platforms that distinguish between O_TEXT and O_BINARY +this layer is always O_BINARY. + +=item "perlio" + +A very complete generic buffering layer which provides the whole of PerlIO API. +It is also intended to be used as a "base class" for other layers. (For example +its Read() method is implemented in terms of the Get_cnt()/Get_ptr()/Set_ptrcnt() +methods). + +"perlio" over "unix" provides a complete replacement for stdio as seen via PerlIO API. +This is the default for USE_PERLIO when system's stdio does not permit perl's +"fast gets" access, and which do not distinguish between O_TEXT and O_BINARY. + +=item "stdio" + +A layer which provides the PerlIO API via the layer scheme, but implements it by calling +system's stdio. This is (currently) the default if system's stdio provides sufficient +access to allow perl's "fast gets" access and which do not distinguish between O_TEXT and +O_BINARY. + +=item "crlf" + +A layer derived using "perlio" as a base class. It provides Win32-like "\n" to CR,LF +translation. Can either be applied above "perlio" or serve as the buffer layer itself. +"crlf" over "unix" is the default if system distinguishes between O_TEXT and O_BINARY +opens. (At some point "unix" will be replaced by a "native" Win32 IO layer on that +platform, as Win32's read/write layer has various drawbacks.) +The "crlf" layer is a reasonable model for a layer which transforms data in some way. + +=item "mmap" + +If Configure detects C functions this layer is provided (with "perlio" as a +"base") which does "read" operations by mmap()ing the file. Performance improvement +is marginal on modern systems, so it is mainly there as a proof of concept. +It is likely to be unbundled from the core at some point. +The "mmap" layer is a reasonable model for a minimalist "derived" layer. + +=item "pending" + +An "internal" derivative of "perlio" which can be used to provide Unread() function +for layers which have no buffer or cannot be bothered. +(Basically this layer's Fill() pops itself off the stack and so resumes reading +from layer below.) + +=item "raw" + +A dummy layer which never exists on the layer stack. Instead when "pushed" it +actually pops the stack!, removing itself, and any other layers until it reaches +a layer with the class PERLIO_K_RAW bit set. + +=item "utf8" + +Another dummy layer. When pushed it pops itself and sets the PERLIO_F_UTF8 flag +on the layer which was (and now is once more) the top of the stack. + +=back + +In addition C also provides a number of PerlIOBase_xxxx() functions +which are intended to be used in the table slots of classes which do not need +to do anything special for a particular method. + +=head2 Extension Layers + +Layers can made available by extension modules. + +=over 4 + +=item "encoding" + + use Encoding; + +makes this layer available. It is an example of a layer which takes an argument. +as it is called as: + + open($fh,"<:encoding(iso-8859-7)",$pathname) + +=back + + +=cut + + +