7 * Copyright (c) 1995 Open Market, Inc.
10 * This file contains proprietary and confidential information and
11 * remains the unpublished property of Open Market, Inc. Use,
12 * disclosure, or reproduction is prohibited except as permitted by
13 * express written license agreement with Open Market, Inc.
16 * snapper@openmarket.com
20 static const char rcsid[] = "$Id: os_unix.c,v 1.5 1998/12/09 05:41:38 roberts Exp $";
25 #include "fcgiappmisc.h"
35 #include <memory.h> /* for memchr() */
39 #ifdef HAVE_SYS_SOCKET_H
40 #include <sys/socket.h> /* for getpeername */
43 #include <fcntl.h> /* for fcntl */
49 #include <sys/types.h>
50 #ifdef HAVE_NETINET_IN_H
51 #include <netinet/in.h>
53 #include <arpa/inet.h>
54 #include <netinet/tcp.h>
59 # if defined(__STDC__) || defined(__cplusplus)
60 typedef void *ClientData;
62 typedef int *ClientData;
63 # endif /* __STDC__ */
68 * This structure holds an entry for each oustanding async I/O operation.
71 OS_AsyncProc procPtr; /* callout completion procedure */
72 ClientData clientData; /* caller private data */
81 * Entries in the async I/O table are allocated 2 per file descriptor.
83 * Read Entry Index = fd * 2
84 * Write Entry Index = (fd * 2) + 1
86 #define AIO_RD_IX(fd) (fd * 2)
87 #define AIO_WR_IX(fd) ((fd * 2) + 1)
89 static int asyncIoTableSize = 16;
90 static AioInfo *asyncIoTable = NULL;
91 #define STDIN_FILENO 0
92 #define STDOUT_FILENO 1
93 #define STDERR_FILENO 2
103 static int isFastCGI = FALSE;
104 static int libInitialized = FALSE;
106 static fd_set readFdSet;
107 static fd_set writeFdSet;
109 static fd_set readFdSetPost;
110 static int numRdPosted = 0;
111 static fd_set writeFdSetPost;
112 static int numWrPosted = 0;
113 static int volatile maxFd = -1;
117 *--------------------------------------------------------------
121 * Set up the OS library for use.
123 * NOTE: This function is really only needed for application
124 * asynchronous I/O. It will most likely change in the
125 * future to setup the multi-threaded environment.
128 * Returns 0 if success, -1 if not.
131 * Async I/O table allocated and initialized.
133 *--------------------------------------------------------------
135 int OS_LibInit(int stdioFds[3])
140 asyncIoTable = malloc(asyncIoTableSize * sizeof(AioInfo));
141 if(asyncIoTable == NULL) {
145 memset((char *) asyncIoTable, 0,
146 asyncIoTableSize * sizeof(AioInfo));
149 FD_ZERO(&writeFdSet);
150 FD_ZERO(&readFdSetPost);
151 FD_ZERO(&writeFdSetPost);
152 libInitialized = TRUE;
158 *--------------------------------------------------------------
162 * Shutdown the OS library.
168 * Memory freed, fds closed.
170 *--------------------------------------------------------------
172 void OS_LibShutdown()
179 libInitialized = FALSE;
185 *----------------------------------------------------------------------
187 * OS_BuildSockAddrUn --
189 * Using the pathname bindPath, fill in the sockaddr_un structure
190 * *servAddrPtr and the length of this structure *servAddrLen.
192 * The format of the sockaddr_un structure changed incompatibly in
193 * 4.3BSD Reno. Digital UNIX supports both formats, other systems
194 * support one or the other.
197 * 0 for normal return, -1 for failure (bindPath too long).
199 *----------------------------------------------------------------------
202 static int OS_BuildSockAddrUn(char *bindPath,
203 struct sockaddr_un *servAddrPtr,
206 int bindPathLen = strlen(bindPath);
208 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
209 if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
212 #else /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
213 if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
217 memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
218 servAddrPtr->sun_family = AF_UNIX;
219 memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
220 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
221 *servAddrLen = sizeof(servAddrPtr->sun_len)
222 + sizeof(servAddrPtr->sun_family)
224 servAddrPtr->sun_len = *servAddrLen;
225 #else /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
226 *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
231 union SockAddrUnion {
232 struct sockaddr_un unixVariant;
233 struct sockaddr_in inetVariant;
238 * OS_CreateLocalIpcFd --
240 * This procedure is responsible for creating the listener socket
241 * on Unix for local process communication. It will create a
242 * domain socket or a TCP/IP socket bound to "localhost" and return
243 * a file descriptor to it to the caller.
246 * Listener socket created. This call returns either a valid
247 * file descriptor or -1 on error.
252 *----------------------------------------------------------------------
254 int OS_CreateLocalIpcFd(char *bindPath)
256 int listenSock, servLen;
257 union SockAddrUnion sa;
261 char host[MAXPATHLEN];
263 strcpy(host, bindPath);
264 if((tp = strchr(host, ':')) != 0) {
266 if((port = atoi(tp)) == 0) {
272 if(tcp && (*host && strcmp(host, "localhost") != 0)) {
273 fprintf(stderr, "To start a service on a TCP port can not "
274 "specify a host name.\n"
275 "You should either use \"localhost:<port>\" or "
276 " just use \":<port>.\"\n");
281 listenSock = socket(AF_INET, SOCK_STREAM, 0);
282 if(listenSock >= 0) {
284 if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
285 (char *) &flag, sizeof(flag)) < 0) {
286 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
291 listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
298 * Bind the listening socket.
301 memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
302 sa.inetVariant.sin_family = AF_INET;
303 sa.inetVariant.sin_addr.s_addr = htonl(INADDR_ANY);
304 sa.inetVariant.sin_port = htons(port);
305 servLen = sizeof(sa.inetVariant);
308 if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
309 fprintf(stderr, "Listening socket's path name is too long.\n");
313 if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
314 || listen(listenSock, 5) < 0) {
315 perror("bind/listen");
324 *----------------------------------------------------------------------
328 * Create the socket and connect to the remote application if
331 * This was lifted from the cgi-fcgi application and was abstracted
332 * out because Windows NT does not have a domain socket and must
333 * use a named pipe which has a different API altogether.
336 * -1 if fail or a valid file descriptor if connection succeeds.
339 * Remote connection established.
341 *----------------------------------------------------------------------
343 int OS_FcgiConnect(char *bindPath)
345 union SockAddrUnion sa;
346 int servLen, resultSock;
349 char host[MAXPATHLEN];
353 strcpy(host, bindPath);
354 if((tp = strchr(host, ':')) != 0) {
356 if((port = atoi(tp)) == 0) {
364 if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
365 fprintf(stderr, "Unknown host: %s\n", bindPath);
368 sa.inetVariant.sin_family = AF_INET;
369 memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
370 sa.inetVariant.sin_port = htons(port);
371 servLen = sizeof(sa.inetVariant);
372 resultSock = socket(AF_INET, SOCK_STREAM, 0);
374 if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
375 fprintf(stderr, "Listening socket's path name is too long.\n");
378 resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
381 assert(resultSock >= 0);
382 connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
384 if(connectStatus >= 0) {
388 * Most likely (errno == ENOENT || errno == ECONNREFUSED)
389 * and no FCGI application server is running.
398 *--------------------------------------------------------------
402 * Pass through to the unix read function.
405 * Returns number of byes read, 0, or -1 failure: errno
406 * contains actual error.
411 *--------------------------------------------------------------
413 int OS_Read(int fd, char * buf, size_t len)
415 return(read(fd, buf, len));
419 *--------------------------------------------------------------
423 * Pass through to unix write function.
426 * Returns number of byes read, 0, or -1 failure: errno
427 * contains actual error.
432 *--------------------------------------------------------------
434 int OS_Write(int fd, char * buf, size_t len)
436 return(write(fd, buf, len));
441 *----------------------------------------------------------------------
445 * Spawns a new FastCGI listener process.
448 * 0 if success, -1 if error.
451 * Child process spawned.
453 *----------------------------------------------------------------------
455 int OS_SpawnChild(char *appPath, int listenFd)
464 if(forkResult == 0) {
466 * Close STDIN unconditionally. It's used by the parent
467 * process for CGI communication. The FastCGI applciation
468 * will be replacing this with the FastCGI listenFd IF
469 * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
470 * (which it is on Unix). Regardless, STDIN, STDOUT, and
471 * STDERR will be closed as the FastCGI process uses a
472 * multiplexed socket in their place.
477 * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
478 * we're set. If not, change it so the child knows where to
479 * get the listen socket from.
481 if(listenFd != FCGI_LISTENSOCK_FILENO) {
482 dup2(listenFd, FCGI_LISTENSOCK_FILENO);
486 close(STDOUT_FILENO);
487 close(STDERR_FILENO);
490 * We're a child. Exec the application.
492 * XXX: entire environment passes through
494 execl(appPath, appPath, NULL);
496 * XXX: Can't do this as we've already closed STDERR!!!
507 *--------------------------------------------------------------
509 * OS_AsyncReadStdin --
511 * This initiates an asynchronous read on the standard
514 * The abstraction is necessary because Windows NT does not
515 * have a clean way of "select"ing a file descriptor for
519 * -1 if error, 0 otherwise.
522 * Asynchronous bit is set in the readfd variable and
523 * request is enqueued.
525 *--------------------------------------------------------------
527 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
528 ClientData clientData)
530 int index = AIO_RD_IX(STDIN_FILENO);
532 ASSERT(asyncIoTable[index].inUse == 0);
533 asyncIoTable[index].procPtr = procPtr;
534 asyncIoTable[index].clientData = clientData;
535 asyncIoTable[index].fd = STDIN_FILENO;
536 asyncIoTable[index].len = len;
537 asyncIoTable[index].offset = 0;
538 asyncIoTable[index].buf = buf;
539 asyncIoTable[index].inUse = 1;
540 FD_SET(STDIN_FILENO, &readFdSet);
541 if(STDIN_FILENO > maxFd)
542 maxFd = STDIN_FILENO;
546 static void GrowAsyncTable(void)
548 int oldTableSize = asyncIoTableSize;
550 asyncIoTableSize = asyncIoTableSize * 2;
551 asyncIoTable = realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
552 if(asyncIoTable == NULL) {
556 memset((char *) &asyncIoTable[oldTableSize], 0,
557 oldTableSize * sizeof(AioInfo));
563 *--------------------------------------------------------------
567 * This initiates an asynchronous read on the file
568 * handle which may be a socket or named pipe.
570 * We also must save the ProcPtr and ClientData, so later
571 * when the io completes, we know who to call.
573 * We don't look at any results here (the ReadFile may
574 * return data if it is cached) but do all completion
575 * processing in OS_Select when we get the io completion
576 * port done notifications. Then we call the callback.
579 * -1 if error, 0 otherwise.
582 * Asynchronous I/O operation is queued for completion.
584 *--------------------------------------------------------------
586 int OS_AsyncRead(int fd, int offset, void *buf, int len,
587 OS_AsyncProc procPtr, ClientData clientData)
589 int index = AIO_RD_IX(fd);
591 ASSERT(asyncIoTable != NULL);
596 if(index >= asyncIoTableSize) {
600 ASSERT(asyncIoTable[index].inUse == 0);
601 asyncIoTable[index].procPtr = procPtr;
602 asyncIoTable[index].clientData = clientData;
603 asyncIoTable[index].fd = fd;
604 asyncIoTable[index].len = len;
605 asyncIoTable[index].offset = offset;
606 asyncIoTable[index].buf = buf;
607 asyncIoTable[index].inUse = 1;
608 FD_SET(fd, &readFdSet);
613 *--------------------------------------------------------------
617 * This initiates an asynchronous write on the "fake" file
618 * descriptor (which may be a file, socket, or named pipe).
619 * We also must save the ProcPtr and ClientData, so later
620 * when the io completes, we know who to call.
622 * We don't look at any results here (the WriteFile generally
623 * completes immediately) but do all completion processing
624 * in OS_DoIo when we get the io completion port done
625 * notifications. Then we call the callback.
628 * -1 if error, 0 otherwise.
631 * Asynchronous I/O operation is queued for completion.
633 *--------------------------------------------------------------
635 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
636 OS_AsyncProc procPtr, ClientData clientData)
638 int index = AIO_WR_IX(fd);
643 if(index >= asyncIoTableSize) {
647 ASSERT(asyncIoTable[index].inUse == 0);
648 asyncIoTable[index].procPtr = procPtr;
649 asyncIoTable[index].clientData = clientData;
650 asyncIoTable[index].fd = fd;
651 asyncIoTable[index].len = len;
652 asyncIoTable[index].offset = offset;
653 asyncIoTable[index].buf = buf;
654 asyncIoTable[index].inUse = 1;
655 FD_SET(fd, &writeFdSet);
660 *--------------------------------------------------------------
664 * Closes the descriptor. This is a pass through to the
668 * 0 for success, -1 on failure
673 *--------------------------------------------------------------
677 int index = AIO_RD_IX(fd);
679 FD_CLR(fd, &readFdSet);
680 FD_CLR(fd, &readFdSetPost);
681 if(asyncIoTable[index].inUse != 0) {
682 asyncIoTable[index].inUse = 0;
685 FD_CLR(fd, &writeFdSet);
686 FD_CLR(fd, &writeFdSetPost);
687 index = AIO_WR_IX(fd);
688 if(asyncIoTable[index].inUse != 0) {
689 asyncIoTable[index].inUse = 0;
697 *--------------------------------------------------------------
701 * Cancel outstanding asynchronous reads and prevent subsequent
702 * reads from completing.
705 * Socket or file is shutdown. Return values mimic Unix shutdown:
706 * 0 success, -1 failure
708 *--------------------------------------------------------------
710 int OS_CloseRead(int fd)
712 if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
713 asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
714 FD_CLR(fd, &readFdSet);
717 return shutdown(fd, 0);
722 *--------------------------------------------------------------
726 * This function was formerly OS_Select. It's purpose is
727 * to pull I/O completion events off the queue and dispatch
728 * them to the appropriate place.
734 * Handlers are called.
736 *--------------------------------------------------------------
738 int OS_DoIo(struct timeval *tmo)
740 int fd, len, selectStatus;
741 OS_AsyncProc procPtr;
742 ClientData clientData;
745 fd_set writeFdSetCpy;
747 FD_ZERO(&readFdSetCpy);
748 FD_ZERO(&writeFdSetCpy);
750 for(fd = 0; fd <= maxFd; fd++) {
751 if(FD_ISSET(fd, &readFdSet)) {
752 FD_SET(fd, &readFdSetCpy);
754 if(FD_ISSET(fd, &writeFdSet)) {
755 FD_SET(fd, &writeFdSetCpy);
760 * If there were no completed events from a prior call, see if there's
763 if(numRdPosted == 0 && numWrPosted == 0) {
764 selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
766 if(selectStatus < 0) {
770 for(fd = 0; fd <= maxFd; fd++) {
772 * Build up a list of completed events. We'll work off of
773 * this list as opposed to looping through the read and write
774 * fd sets since they can be affected by a callbacl routine.
776 if(FD_ISSET(fd, &readFdSetCpy)) {
778 FD_SET(fd, &readFdSetPost);
779 FD_CLR(fd, &readFdSet);
782 if(FD_ISSET(fd, &writeFdSetCpy)) {
784 FD_SET(fd, &writeFdSetPost);
785 FD_CLR(fd, &writeFdSet);
790 if(numRdPosted == 0 && numWrPosted == 0)
793 for(fd = 0; fd <= maxFd; fd++) {
795 * Do reads and dispatch callback.
797 if(FD_ISSET(fd, &readFdSetPost)
798 && asyncIoTable[AIO_RD_IX(fd)].inUse) {
801 FD_CLR(fd, &readFdSetPost);
802 aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
804 len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
806 procPtr = aioPtr->procPtr;
807 aioPtr->procPtr = NULL;
808 clientData = aioPtr->clientData;
811 (*procPtr)(clientData, len);
815 * Do writes and dispatch callback.
817 if(FD_ISSET(fd, &writeFdSetPost) &&
818 asyncIoTable[AIO_WR_IX(fd)].inUse) {
821 FD_CLR(fd, &writeFdSetPost);
822 aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
824 len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
826 procPtr = aioPtr->procPtr;
827 aioPtr->procPtr = NULL;
828 clientData = aioPtr->clientData;
830 (*procPtr)(clientData, len);
838 *----------------------------------------------------------------------
842 * Checks if a client address is in a list of allowed addresses
845 * TRUE if address list is empty or client address is present
846 * in the list, FALSE otherwise.
848 *----------------------------------------------------------------------
850 static int ClientAddrOK(struct sockaddr_in *saPtr, char *clientList)
853 char *clientListCopy, *cur, *next;
854 char *newString = NULL;
857 if(clientList == NULL || *clientList == '\0') {
861 strLen = strlen(clientList);
862 clientListCopy = malloc(strLen + 1);
863 assert(newString != NULL);
864 memcpy(newString, clientList, strLen);
865 newString[strLen] = '\000';
867 for(cur = clientListCopy; cur != NULL; cur = next) {
868 next = strchr(cur, ',');
872 if(inet_addr(cur) == saPtr->sin_addr.s_addr) {
877 free(clientListCopy);
883 *----------------------------------------------------------------------
887 * On platforms that implement concurrent calls to accept
888 * on a shared listening ipcFd, returns 0. On other platforms,
889 * acquires an exclusive lock across all processes sharing a
890 * listening ipcFd, blocking until the lock has been acquired.
893 * 0 for successful call, -1 in case of system error (fatal).
896 * This process now has the exclusive lock.
898 *----------------------------------------------------------------------
900 static int AcquireLock(int blocking)
904 lock.l_type = F_WRLCK;
906 lock.l_whence = SEEK_SET;
909 if(fcntl(FCGI_LISTENSOCK_FILENO,
910 blocking ? F_SETLKW : F_SETLK, &lock) < 0) {
914 #endif /* USE_LOCKING */
919 *----------------------------------------------------------------------
923 * On platforms that implement concurrent calls to accept
924 * on a shared listening ipcFd, does nothing. On other platforms,
925 * releases an exclusive lock acquired by AcquireLock.
928 * 0 for successful call, -1 in case of system error (fatal).
931 * This process no longer holds the lock.
933 *----------------------------------------------------------------------
935 static int ReleaseLock(void)
939 lock.l_type = F_UNLCK;
941 lock.l_whence = SEEK_SET;
944 if(fcntl(FCGI_LISTENSOCK_FILENO, F_SETLK, &lock) < 0) {
947 #endif /* USE_LOCKING */
953 *----------------------------------------------------------------------
955 * OS_FcgiIpcAccept --
957 * Accepts a new FastCGI connection. This routine knows whether
958 * we're dealing with TCP based sockets or NT Named Pipes for IPC.
961 * -1 if the operation fails, otherwise this is a valid IPC fd.
964 * New IPC connection is accepted.
966 *----------------------------------------------------------------------
968 int OS_FcgiIpcAccept(char *clientAddrList)
972 struct sockaddr_un un;
973 struct sockaddr_in in;
977 if (AcquireLock(TRUE) < 0) {
983 socket = accept(FCGI_LISTENSOCK_FILENO,
984 (struct sockaddr *) &sa.un,
986 } while ((socket < 0) && (errno == EINTR));
990 * If the new connection uses TCP/IP, check the client IP address;
991 * if the address isn't valid, close the connection and
994 if (sa.in.sin_family == AF_INET) {
996 /* No replies to outgoing data, so disable Nagle algorithm */
998 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
999 (char *)&set, sizeof(set));
1001 if (!ClientAddrOK(&sa.in, clientAddrList)) {
1010 /* Based on Apache's (v1.3.1) http_main.c accept() handling and
1011 * Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6
1015 /* EPROTO on certain older kernels really means
1016 * ECONNABORTED, so we need to ignore it for them.
1017 * See discussion in new-httpd archives nh.9701
1018 * search for EPROTO.
1020 * Also see nh.9603, search for EPROTO:
1021 * There is potentially a bug in Solaris 2.x x<6,
1022 * and other boxes that implement tcp sockets in
1023 * userland (i.e. on top of STREAMS). On these
1024 * systems, EPROTO can actually result in a fatal
1025 * loop. See PR#981 for example. It's hard to
1026 * handle both uses of EPROTO.
1033 /* Linux generates the rest of these, other tcp
1034 * stacks (i.e. bsd) tend to hide them behind
1035 * getsockopt() interfaces. They occur when
1036 * the net goes sour or the client disconnects
1037 * after the three-way handshake has been done
1038 * in the kernel but before userland has picked
1053 break; /* switch(errno) */
1056 int errnoSave = errno;
1061 } /* switch(errno) */
1064 if (ReleaseLock() < 0) {
1071 *----------------------------------------------------------------------
1075 * OS IPC routine to close an IPC connection.
1081 * IPC connection is closed.
1083 *----------------------------------------------------------------------
1085 int OS_IpcClose(int ipcFd)
1087 return OS_Close(ipcFd);
1092 *----------------------------------------------------------------------
1096 * Determines whether this process is a FastCGI process or not.
1099 * Returns 1 if FastCGI, 0 if not.
1104 *----------------------------------------------------------------------
1109 struct sockaddr_in in;
1110 struct sockaddr_un un;
1112 int len = sizeof(sa);
1114 if (getpeername(FCGI_LISTENSOCK_FILENO, (struct sockaddr *)&sa, &len) != 0
1115 && errno == ENOTCONN)
1124 *----------------------------------------------------------------------
1128 * Sets selected flag bits in an open file descriptor.
1130 *----------------------------------------------------------------------
1132 void OS_SetFlags(int fd, int flags)
1135 if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1139 if(fcntl(fd, F_SETFL, val) < 0) {