libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *      Description of file.
   5  *
   6  *
   7  *  Copyright (c) 1995 Open Market, Inc.
   8  *  All rights reserved.
   9  *
  10  *  This file contains proprietary and confidential information and
  11  *  remains the unpublished property of Open Market, Inc. Use,
  12  *  disclosure, or reproduction is prohibited except as permitted by
  13  *  express written license agreement with Open Market, Inc.
  14  *
  15  *  Bill Snapper
  16  *  snapper@openmarket.com
  17  */
  18
  19 #ifndef lint
  20 static const char rcsid[] = "$Id: os_unix.c,v 1.16 2000/02/03 08:52:56 skimo Exp $";
  21 #endif /* not lint */
  22
  23 #include "fcgi_config.h"
  24
  25 #include <sys/types.h>
  26
  27 #ifdef HAVE_NETINET_IN_H
  28 #include <netinet/in.h>
  29 #endif
  30
  31 #include <arpa/inet.h>
  32 #include <assert.h>
  33 #include <errno.h>
  34 #include <fcntl.h>      /* for fcntl */
  35 #include <math.h>
  36 #include <memory.h>     /* for memchr() */
  37 #include <netinet/tcp.h>
  38 #include <stdarg.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <sys/time.h>
  43 #include <sys/un.h>
  44
  45 #ifdef HAVE_NETDB_H
  46 #include <netdb.h>
  47 #endif
  48
  49 #ifdef HAVE_SYS_SOCKET_H
  50 #include <sys/socket.h> /* for getpeername */
  51 #endif
  52
  53 #ifdef HAVE_UNISTD_H
  54 #include <unistd.h>
  55 #endif
  56
  57 #include "fastcgi.h"
  58 #include "fcgiapp.h"
  59 #include "fcgiappmisc.h"
  60 #include "fcgimisc.h"
  61 #include "fcgios.h"
  62
  63 #ifndef FALSE
  64 #define FALSE 0
  65 #endif
  66
  67 #ifndef TRUE
  68 #define TRUE 1
  69 #endif
  70
  71 /*
  72  * This structure holds an entry for each oustanding async I/O operation.
  73  */
  74 typedef struct {
  75     OS_AsyncProc procPtr;           /* callout completion procedure */
  76     ClientData clientData;          /* caller private data */
  77     int fd;
  78     int len;
  79     int offset;
  80     void *buf;
  81     int inUse;
  82 } AioInfo;
  83
  84 /*
  85  * Entries in the async I/O table are allocated 2 per file descriptor.
  86  *
  87  * Read Entry Index  = fd * 2
  88  * Write Entry Index = (fd * 2) + 1
  89  */
  90 #define AIO_RD_IX(fd) (fd * 2)
  91 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  92
  93 static int asyncIoTableSize = 16;
  94 static AioInfo *asyncIoTable = NULL;
  95
  96 static int libInitialized = FALSE;
  97
  98 static fd_set readFdSet;
  99 static fd_set writeFdSet;
 100
 101 static fd_set readFdSetPost;
 102 static int numRdPosted = 0;
 103 static fd_set writeFdSetPost;
 104 static int numWrPosted = 0;
 105 static int volatile maxFd = -1;
 106
 107 \f
 108 /*
 109  *--------------------------------------------------------------
 110  *
 111  * OS_LibInit --
 112  *
 113  *      Set up the OS library for use.
 114  *
 115  *      NOTE: This function is really only needed for application
 116  *            asynchronous I/O.  It will most likely change in the
 117  *            future to setup the multi-threaded environment.
 118  *
 119  * Results:
 120  *      Returns 0 if success, -1 if not.
 121  *
 122  * Side effects:
 123  *      Async I/O table allocated and initialized.
 124  *
 125  *--------------------------------------------------------------
 126  */
 127 int OS_LibInit(int stdioFds[3])
 128 {
 129     if(libInitialized)
 130         return 0;
 131
 132     asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
 133     if(asyncIoTable == NULL) {
 134         errno = ENOMEM;
 135         return -1;
 136     }
 137     memset((char *) asyncIoTable, 0,
 138            asyncIoTableSize * sizeof(AioInfo));
 139
 140     FD_ZERO(&readFdSet);
 141     FD_ZERO(&writeFdSet);
 142     FD_ZERO(&readFdSetPost);
 143     FD_ZERO(&writeFdSetPost);
 144     libInitialized = TRUE;
 145     return 0;
 146 }
 147
 148 \f
 149 /*
 150  *--------------------------------------------------------------
 151  *
 152  * OS_LibShutdown --
 153  *
 154  *      Shutdown the OS library.
 155  *
 156  * Results:
 157  *      None.
 158  *
 159  * Side effects:
 160  *      Memory freed, fds closed.
 161  *
 162  *--------------------------------------------------------------
 163  */
 164 void OS_LibShutdown()
 165 {
 166     if(!libInitialized)
 167         return;
 168
 169     free(asyncIoTable);
 170     asyncIoTable = NULL;
 171     libInitialized = FALSE;
 172     return;
 173 }
 174
 175 \f
 176 /*
 177  *----------------------------------------------------------------------
 178  *
 179  * OS_BuildSockAddrUn --
 180  *
 181  *      Using the pathname bindPath, fill in the sockaddr_un structure
 182  *      *servAddrPtr and the length of this structure *servAddrLen.
 183  *
 184  *      The format of the sockaddr_un structure changed incompatibly in
 185  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 186  *      support one or the other.
 187  *
 188  * Results:
 189  *      0 for normal return, -1 for failure (bindPath too long).
 190  *
 191  *----------------------------------------------------------------------
 192  */
 193
 194 static int OS_BuildSockAddrUn(const char *bindPath,
 195                               struct sockaddr_un *servAddrPtr,
 196                               int *servAddrLen)
 197 {
 198     int bindPathLen = strlen(bindPath);
 199
 200 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 201     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 202         return -1;
 203     }
 204 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 205     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 206         return -1;
 207     }
 208 #endif
 209     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 210     servAddrPtr->sun_family = AF_UNIX;
 211     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 212 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 213     *servAddrLen = sizeof(servAddrPtr->sun_len)
 214             + sizeof(servAddrPtr->sun_family)
 215             + bindPathLen + 1;
 216     servAddrPtr->sun_len = *servAddrLen;
 217 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 218     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 219 #endif
 220     return 0;
 221 }
 222 \f
 223 union SockAddrUnion {
 224     struct  sockaddr_un unixVariant;
 225     struct  sockaddr_in inetVariant;
 226 };
 227
 228 \f
 229 /*
 230  * OS_CreateLocalIpcFd --
 231  *
 232  *   This procedure is responsible for creating the listener socket
 233  *   on Unix for local process communication.  It will create a
 234  *   domain socket or a TCP/IP socket bound to "localhost" and return
 235  *   a file descriptor to it to the caller.
 236  *
 237  * Results:
 238  *      Listener socket created.  This call returns either a valid
 239  *      file descriptor or -1 on error.
 240  *
 241  * Side effects:
 242  *      None.
 243  *
 244  *----------------------------------------------------------------------
 245  */
 246 int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
 247 {
 248     int listenSock, servLen;
 249     union   SockAddrUnion sa;
 250     int     tcp = FALSE;
 251     char    *tp;
 252     short   port;
 253     char    host[MAXPATHLEN];
 254
 255     strcpy(host, bindPath);
 256     if((tp = strchr(host, ':')) != 0) {
 257         *tp++ = 0;
 258         if((port = atoi(tp)) == 0) {
 259             *--tp = ':';
 260          } else {
 261             tcp = TRUE;
 262          }
 263     }
 264     if(tcp && (*host && strcmp(host, "localhost") != 0)) {
 265         fprintf(stderr, "To start a service on a TCP port can not "
 266                         "specify a host name.\n"
 267                         "You should either use \"localhost:<port>\" or "
 268                         " just use \":<port>.\"\n");
 269         exit(1);
 270     }
 271
 272     if(tcp) {
 273         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 274         if(listenSock >= 0) {
 275             int flag = 1;
 276             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 277                           (char *) &flag, sizeof(flag)) < 0) {
 278                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 279                 exit(1001);
 280             }
 281         }
 282     } else {
 283         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 284     }
 285     if(listenSock < 0) {
 286         return -1;
 287     }
 288
 289     /*
 290      * Bind the listening socket.
 291      */
 292     if(tcp) {
 293         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 294         sa.inetVariant.sin_family = AF_INET;
 295         sa.inetVariant.sin_addr.s_addr = htonl(INADDR_ANY);
 296         sa.inetVariant.sin_port = htons(port);
 297         servLen = sizeof(sa.inetVariant);
 298     } else {
 299         unlink(bindPath);
 300         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 301             fprintf(stderr, "Listening socket's path name is too long.\n");
 302             exit(1000);
 303         }
 304     }
 305     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 306        || listen(listenSock, backlog) < 0) {
 307         perror("bind/listen");
 308         exit(errno);
 309     }
 310
 311     return listenSock;
 312 }
 313
 314 \f
 315 /*
 316  *----------------------------------------------------------------------
 317  *
 318  * OS_FcgiConnect --
 319  *
 320  *      Create the socket and connect to the remote application if
 321  *      possible.
 322  *
 323  *      This was lifted from the cgi-fcgi application and was abstracted
 324  *      out because Windows NT does not have a domain socket and must
 325  *      use a named pipe which has a different API altogether.
 326  *
 327  * Results:
 328  *      -1 if fail or a valid file descriptor if connection succeeds.
 329  *
 330  * Side effects:
 331  *      Remote connection established.
 332  *
 333  *----------------------------------------------------------------------
 334  */
 335 int OS_FcgiConnect(char *bindPath)
 336 {
 337     union   SockAddrUnion sa;
 338     int servLen, resultSock;
 339     int connectStatus;
 340     char    *tp;
 341     char    host[MAXPATHLEN];
 342     short   port;
 343     int     tcp = FALSE;
 344
 345     strcpy(host, bindPath);
 346     if((tp = strchr(host, ':')) != 0) {
 347         *tp++ = 0;
 348         if((port = atoi(tp)) == 0) {
 349             *--tp = ':';
 350          } else {
 351             tcp = TRUE;
 352          }
 353     }
 354     if(tcp == TRUE) {
 355         struct  hostent *hp;
 356         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 357             fprintf(stderr, "Unknown host: %s\n", bindPath);
 358             exit(1000);
 359         }
 360         sa.inetVariant.sin_family = AF_INET;
 361         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 362         sa.inetVariant.sin_port = htons(port);
 363         servLen = sizeof(sa.inetVariant);
 364         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 365     } else {
 366         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 367             fprintf(stderr, "Listening socket's path name is too long.\n");
 368             exit(1000);
 369         }
 370         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 371     }
 372
 373     assert(resultSock >= 0);
 374     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 375                              servLen);
 376     if(connectStatus >= 0) {
 377         return resultSock;
 378     } else {
 379         /*
 380          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 381          * and no FCGI application server is running.
 382          */
 383         close(resultSock);
 384         return -1;
 385     }
 386 }
 387
 388 \f
 389 /*
 390  *--------------------------------------------------------------
 391  *
 392  * OS_Read --
 393  *
 394  *      Pass through to the unix read function.
 395  *
 396  * Results:
 397  *      Returns number of byes read, 0, or -1 failure: errno
 398  *      contains actual error.
 399  *
 400  * Side effects:
 401  *      None.
 402  *
 403  *--------------------------------------------------------------
 404  */
 405 int OS_Read(int fd, char * buf, size_t len)
 406 {
 407     return(read(fd, buf, len));
 408 }
 409 \f
 410 /*
 411  *--------------------------------------------------------------
 412  *
 413  * OS_Write --
 414  *
 415  *      Pass through to unix write function.
 416  *
 417  * Results:
 418  *      Returns number of byes read, 0, or -1 failure: errno
 419  *      contains actual error.
 420  *
 421  * Side effects:
 422  *      none.
 423  *
 424  *--------------------------------------------------------------
 425  */
 426 int OS_Write(int fd, char * buf, size_t len)
 427 {
 428     return(write(fd, buf, len));
 429 }
 430
 431 \f
 432 /*
 433  *----------------------------------------------------------------------
 434  *
 435  * OS_SpawnChild --
 436  *
 437  *      Spawns a new FastCGI listener process.
 438  *
 439  * Results:
 440  *      0 if success, -1 if error.
 441  *
 442  * Side effects:
 443  *      Child process spawned.
 444  *
 445  *----------------------------------------------------------------------
 446  */
 447 int OS_SpawnChild(char *appPath, int listenFd)
 448 {
 449     int forkResult;
 450
 451     forkResult = fork();
 452     if(forkResult < 0) {
 453         exit(errno);
 454     }
 455
 456     if(forkResult == 0) {
 457         /*
 458          * Close STDIN unconditionally.  It's used by the parent
 459          * process for CGI communication.  The FastCGI applciation
 460          * will be replacing this with the FastCGI listenFd IF
 461          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 462          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 463          * STDERR will be closed as the FastCGI process uses a
 464          * multiplexed socket in their place.
 465          */
 466         close(STDIN_FILENO);
 467
 468         /*
 469          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 470          * we're set.  If not, change it so the child knows where to
 471          * get the listen socket from.
 472          */
 473         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 474             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 475             close(listenFd);
 476         }
 477
 478         close(STDOUT_FILENO);
 479         close(STDERR_FILENO);
 480
 481         /*
 482          * We're a child.  Exec the application.
 483          *
 484          * XXX: entire environment passes through
 485          */
 486         execl(appPath, appPath, NULL);
 487         /*
 488          * XXX: Can't do this as we've already closed STDERR!!!
 489          *
 490          * perror("exec");
 491          */
 492         exit(errno);
 493     }
 494     return 0;
 495 }
 496
 497 \f
 498 /*
 499  *--------------------------------------------------------------
 500  *
 501  * OS_AsyncReadStdin --
 502  *
 503  *      This initiates an asynchronous read on the standard
 504  *      input handle.
 505  *
 506  *      The abstraction is necessary because Windows NT does not
 507  *      have a clean way of "select"ing a file descriptor for
 508  *      I/O.
 509  *
 510  * Results:
 511  *      -1 if error, 0 otherwise.
 512  *
 513  * Side effects:
 514  *      Asynchronous bit is set in the readfd variable and
 515  *      request is enqueued.
 516  *
 517  *--------------------------------------------------------------
 518  */
 519 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 520                       ClientData clientData)
 521 {
 522     int index = AIO_RD_IX(STDIN_FILENO);
 523
 524     ASSERT(asyncIoTable[index].inUse == 0);
 525     asyncIoTable[index].procPtr = procPtr;
 526     asyncIoTable[index].clientData = clientData;
 527     asyncIoTable[index].fd = STDIN_FILENO;
 528     asyncIoTable[index].len = len;
 529     asyncIoTable[index].offset = 0;
 530     asyncIoTable[index].buf = buf;
 531     asyncIoTable[index].inUse = 1;
 532     FD_SET(STDIN_FILENO, &readFdSet);
 533     if(STDIN_FILENO > maxFd)
 534         maxFd = STDIN_FILENO;
 535     return 0;
 536 }
 537
 538 static void GrowAsyncTable(void)
 539 {
 540     int oldTableSize = asyncIoTableSize;
 541
 542     asyncIoTableSize = asyncIoTableSize * 2;
 543     asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 544     if(asyncIoTable == NULL) {
 545         errno = ENOMEM;
 546         exit(errno);
 547     }
 548     memset((char *) &asyncIoTable[oldTableSize], 0,
 549            oldTableSize * sizeof(AioInfo));
 550
 551 }
 552
 553 \f
 554 /*
 555  *--------------------------------------------------------------
 556  *
 557  * OS_AsyncRead --
 558  *
 559  *      This initiates an asynchronous read on the file
 560  *      handle which may be a socket or named pipe.
 561  *
 562  *      We also must save the ProcPtr and ClientData, so later
 563  *      when the io completes, we know who to call.
 564  *
 565  *      We don't look at any results here (the ReadFile may
 566  *      return data if it is cached) but do all completion
 567  *      processing in OS_Select when we get the io completion
 568  *      port done notifications.  Then we call the callback.
 569  *
 570  * Results:
 571  *      -1 if error, 0 otherwise.
 572  *
 573  * Side effects:
 574  *      Asynchronous I/O operation is queued for completion.
 575  *
 576  *--------------------------------------------------------------
 577  */
 578 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 579                  OS_AsyncProc procPtr, ClientData clientData)
 580 {
 581     int index = AIO_RD_IX(fd);
 582
 583     ASSERT(asyncIoTable != NULL);
 584
 585     if(fd > maxFd)
 586         maxFd = fd;
 587
 588     if(index >= asyncIoTableSize) {
 589         GrowAsyncTable();
 590     }
 591
 592     ASSERT(asyncIoTable[index].inUse == 0);
 593     asyncIoTable[index].procPtr = procPtr;
 594     asyncIoTable[index].clientData = clientData;
 595     asyncIoTable[index].fd = fd;
 596     asyncIoTable[index].len = len;
 597     asyncIoTable[index].offset = offset;
 598     asyncIoTable[index].buf = buf;
 599     asyncIoTable[index].inUse = 1;
 600     FD_SET(fd, &readFdSet);
 601     return 0;
 602 }
 603 \f
 604 /*
 605  *--------------------------------------------------------------
 606  *
 607  * OS_AsyncWrite --
 608  *
 609  *      This initiates an asynchronous write on the "fake" file
 610  *      descriptor (which may be a file, socket, or named pipe).
 611  *      We also must save the ProcPtr and ClientData, so later
 612  *      when the io completes, we know who to call.
 613  *
 614  *      We don't look at any results here (the WriteFile generally
 615  *      completes immediately) but do all completion processing
 616  *      in OS_DoIo when we get the io completion port done
 617  *      notifications.  Then we call the callback.
 618  *
 619  * Results:
 620  *      -1 if error, 0 otherwise.
 621  *
 622  * Side effects:
 623  *      Asynchronous I/O operation is queued for completion.
 624  *
 625  *--------------------------------------------------------------
 626  */
 627 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 628                   OS_AsyncProc procPtr, ClientData clientData)
 629 {
 630     int index = AIO_WR_IX(fd);
 631
 632     if(fd > maxFd)
 633         maxFd = fd;
 634
 635     if(index >= asyncIoTableSize) {
 636         GrowAsyncTable();
 637     }
 638
 639     ASSERT(asyncIoTable[index].inUse == 0);
 640     asyncIoTable[index].procPtr = procPtr;
 641     asyncIoTable[index].clientData = clientData;
 642     asyncIoTable[index].fd = fd;
 643     asyncIoTable[index].len = len;
 644     asyncIoTable[index].offset = offset;
 645     asyncIoTable[index].buf = buf;
 646     asyncIoTable[index].inUse = 1;
 647     FD_SET(fd, &writeFdSet);
 648     return 0;
 649 }
 650 \f
 651 /*
 652  *--------------------------------------------------------------
 653  *
 654  * OS_Close --
 655  *
 656  *      Closes the descriptor.  This is a pass through to the
 657  *      Unix close.
 658  *
 659  * Results:
 660  *      0 for success, -1 on failure
 661  *
 662  * Side effects:
 663  *      None.
 664  *
 665  *--------------------------------------------------------------
 666  */
 667 int OS_Close(int fd)
 668 {
 669     int index = AIO_RD_IX(fd);
 670
 671     FD_CLR(fd, &readFdSet);
 672     FD_CLR(fd, &readFdSetPost);
 673     if(asyncIoTable[index].inUse != 0) {
 674         asyncIoTable[index].inUse = 0;
 675     }
 676
 677     FD_CLR(fd, &writeFdSet);
 678     FD_CLR(fd, &writeFdSetPost);
 679     index = AIO_WR_IX(fd);
 680     if(asyncIoTable[index].inUse != 0) {
 681         asyncIoTable[index].inUse = 0;
 682     }
 683     if(maxFd == fd)
 684         maxFd--;
 685     return close(fd);
 686 }
 687 \f
 688 /*
 689  *--------------------------------------------------------------
 690  *
 691  * OS_CloseRead --
 692  *
 693  *      Cancel outstanding asynchronous reads and prevent subsequent
 694  *      reads from completing.
 695  *
 696  * Results:
 697  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 698  *              0 success, -1 failure
 699  *
 700  *--------------------------------------------------------------
 701  */
 702 int OS_CloseRead(int fd)
 703 {
 704     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 705         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 706         FD_CLR(fd, &readFdSet);
 707     }
 708
 709     return shutdown(fd, 0);
 710 }
 711
 712 \f
 713 /*
 714  *--------------------------------------------------------------
 715  *
 716  * OS_DoIo --
 717  *
 718  *      This function was formerly OS_Select.  It's purpose is
 719  *      to pull I/O completion events off the queue and dispatch
 720  *      them to the appropriate place.
 721  *
 722  * Results:
 723  *      Returns 0.
 724  *
 725  * Side effects:
 726  *      Handlers are called.
 727  *
 728  *--------------------------------------------------------------
 729  */
 730 int OS_DoIo(struct timeval *tmo)
 731 {
 732     int fd, len, selectStatus;
 733     OS_AsyncProc procPtr;
 734     ClientData clientData;
 735     AioInfo *aioPtr;
 736     fd_set readFdSetCpy;
 737     fd_set writeFdSetCpy;
 738
 739     FD_ZERO(&readFdSetCpy);
 740     FD_ZERO(&writeFdSetCpy);
 741
 742     for(fd = 0; fd <= maxFd; fd++) {
 743         if(FD_ISSET(fd, &readFdSet)) {
 744             FD_SET(fd, &readFdSetCpy);
 745         }
 746         if(FD_ISSET(fd, &writeFdSet)) {
 747             FD_SET(fd, &writeFdSetCpy);
 748         }
 749     }
 750
 751     /*
 752      * If there were no completed events from a prior call, see if there's
 753      * any work to do.
 754      */
 755     if(numRdPosted == 0 && numWrPosted == 0) {
 756         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 757                               NULL, tmo);
 758         if(selectStatus < 0) {
 759             exit(errno);
 760         }
 761
 762         for(fd = 0; fd <= maxFd; fd++) {
 763             /*
 764              * Build up a list of completed events.  We'll work off of
 765              * this list as opposed to looping through the read and write
 766              * fd sets since they can be affected by a callbacl routine.
 767              */
 768             if(FD_ISSET(fd, &readFdSetCpy)) {
 769                 numRdPosted++;
 770                 FD_SET(fd, &readFdSetPost);
 771                 FD_CLR(fd, &readFdSet);
 772             }
 773
 774             if(FD_ISSET(fd, &writeFdSetCpy)) {
 775                 numWrPosted++;
 776                 FD_SET(fd, &writeFdSetPost);
 777                 FD_CLR(fd, &writeFdSet);
 778             }
 779         }
 780     }
 781
 782     if(numRdPosted == 0 && numWrPosted == 0)
 783         return 0;
 784
 785     for(fd = 0; fd <= maxFd; fd++) {
 786         /*
 787          * Do reads and dispatch callback.
 788          */
 789         if(FD_ISSET(fd, &readFdSetPost)
 790            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 791
 792             numRdPosted--;
 793             FD_CLR(fd, &readFdSetPost);
 794             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 795
 796             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 797
 798             procPtr = aioPtr->procPtr;
 799             aioPtr->procPtr = NULL;
 800             clientData = aioPtr->clientData;
 801             aioPtr->inUse = 0;
 802
 803             (*procPtr)(clientData, len);
 804         }
 805
 806         /*
 807          * Do writes and dispatch callback.
 808          */
 809         if(FD_ISSET(fd, &writeFdSetPost) &&
 810            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 811
 812             numWrPosted--;
 813             FD_CLR(fd, &writeFdSetPost);
 814             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 815
 816             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 817
 818             procPtr = aioPtr->procPtr;
 819             aioPtr->procPtr = NULL;
 820             clientData = aioPtr->clientData;
 821             aioPtr->inUse = 0;
 822             (*procPtr)(clientData, len);
 823         }
 824     }
 825     return 0;
 826 }
 827
 828 \f
 829 /*
 830  *----------------------------------------------------------------------
 831  *
 832  * ClientAddrOK --
 833  *
 834  *      Checks if a client address is in a list of allowed addresses
 835  *
 836  * Results:
 837  *      TRUE if address list is empty or client address is present
 838  *      in the list, FALSE otherwise.
 839  *
 840  *----------------------------------------------------------------------
 841  */
 842 static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
 843 {
 844     int result = FALSE;
 845     char *clientListCopy, *cur, *next;
 846     char *newString = NULL;
 847     int strLen;
 848
 849     if(clientList == NULL || *clientList == '\0') {
 850         return TRUE;
 851     }
 852
 853     strLen = strlen(clientList);
 854     clientListCopy = (char *)malloc(strLen + 1);
 855     assert(newString != NULL);
 856     memcpy(newString, clientList, strLen);
 857     newString[strLen] = '\000';
 858
 859     for(cur = clientListCopy; cur != NULL; cur = next) {
 860         next = strchr(cur, ',');
 861         if(next != NULL) {
 862             *next++ = '\0';
 863         }
 864         if(inet_addr(cur) == saPtr->sin_addr.s_addr) {
 865             result = TRUE;
 866             break;
 867         }
 868     }
 869     free(clientListCopy);
 870     return result;
 871 }
 872
 873 \f
 874 /*
 875  *----------------------------------------------------------------------
 876  *
 877  * AcquireLock --
 878  *
 879  *      On platforms that implement concurrent calls to accept
 880  *      on a shared listening ipcFd, returns 0.  On other platforms,
 881  *      acquires an exclusive lock across all processes sharing a
 882  *      listening ipcFd, blocking until the lock has been acquired.
 883  *
 884  * Results:
 885  *      0 for successful call, -1 in case of system error (fatal).
 886  *
 887  * Side effects:
 888  *      This process now has the exclusive lock.
 889  *
 890  *----------------------------------------------------------------------
 891  */
 892 static int AcquireLock(int sock, int fail_on_intr)
 893 {
 894 #ifdef USE_LOCKING
 895     do {
 896         struct flock lock;
 897         lock.l_type = F_WRLCK;
 898         lock.l_start = 0;
 899         lock.l_whence = SEEK_SET;
 900         lock.l_len = 0;
 901
 902         if (fcntl(sock, F_SETLKW, &lock) != -1)
 903             return 0;
 904     } while (errno == EINTR && !fail_on_intr);
 905
 906     return -1;
 907
 908 #else
 909     return 0;
 910 #endif
 911 }
 912 \f
 913 /*
 914  *----------------------------------------------------------------------
 915  *
 916  * ReleaseLock --
 917  *
 918  *      On platforms that implement concurrent calls to accept
 919  *      on a shared listening ipcFd, does nothing.  On other platforms,
 920  *      releases an exclusive lock acquired by AcquireLock.
 921  *
 922  * Results:
 923  *      0 for successful call, -1 in case of system error (fatal).
 924  *
 925  * Side effects:
 926  *      This process no longer holds the lock.
 927  *
 928  *----------------------------------------------------------------------
 929  */
 930 static int ReleaseLock(int sock)
 931 {
 932 #ifdef USE_LOCKING
 933     do {
 934         struct flock lock;
 935         lock.l_type = F_UNLCK;
 936         lock.l_start = 0;
 937         lock.l_whence = SEEK_SET;
 938         lock.l_len = 0;
 939
 940         if (fcntl(sock, F_SETLK, &lock) != -1)
 941             return 0;
 942     } while (errno == EINTR);
 943
 944     return -1;
 945
 946 #else
 947     return 0;
 948 #endif
 949 }
 950
 951 \f
 952 /**********************************************************************
 953  * Determine if the errno resulting from a failed accept() warrants a
 954  * retry or exit().  Based on Apache's http_main.c accept() handling
 955  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
 956  */
 957 static int is_reasonable_accept_errno (const int error)
 958 {
 959     switch (error) {
 960 #ifdef EPROTO
 961         /* EPROTO on certain older kernels really means ECONNABORTED, so
 962          * we need to ignore it for them.  See discussion in new-httpd
 963          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
 964          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
 965          * other boxes that implement tcp sockets in userland (i.e. on top of
 966          * STREAMS).  On these systems, EPROTO can actually result in a fatal
 967          * loop.  See PR#981 for example.  It's hard to handle both uses of
 968          * EPROTO. */
 969         case EPROTO:
 970 #endif
 971 #ifdef ECONNABORTED
 972         case ECONNABORTED:
 973 #endif
 974         /* Linux generates the rest of these, other tcp stacks (i.e.
 975          * bsd) tend to hide them behind getsockopt() interfaces.  They
 976          * occur when the net goes sour or the client disconnects after the
 977          * three-way handshake has been done in the kernel but before
 978          * userland has picked up the socket. */
 979 #ifdef ECONNRESET
 980         case ECONNRESET:
 981 #endif
 982 #ifdef ETIMEDOUT
 983         case ETIMEDOUT:
 984 #endif
 985 #ifdef EHOSTUNREACH
 986         case EHOSTUNREACH:
 987 #endif
 988 #ifdef ENETUNREACH
 989         case ENETUNREACH:
 990 #endif
 991             return 1;
 992
 993         default:
 994             return 0;
 995     }
 996 }
 997
 998 /**********************************************************************
 999  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1000  * others?).  When a connect() is made to a Unix Domain socket, but its
1001  * not accept()ed before the web server gets impatient and close()s, an
1002  * accept() results in a valid file descriptor, but no data to read.
1003  * This causes a block on the first read() - which never returns!
1004  *
1005  * Another approach to this is to write() to the socket to provoke a
1006  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1007  * that whatever is written has to be universally ignored by all FastCGI
1008  * web servers, and a SIGPIPE handler has to be installed which returns
1009  * (or SIGPIPE is ignored).
1010  *
1011  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1012  *
1013  * Making it shorter is probably safe, but I'll leave that to you.  Making
1014  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1015  * the faster your application will be able to recover (waiting 2 seconds
1016  * may _cause_ the problem when there is a very high demand). At any rate,
1017  * this is better than perma-blocking.
1018  */
1019 static int is_af_unix_keeper(const int fd)
1020 {
1021     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1022     fd_set read_fds;
1023
1024     FD_ZERO(&read_fds);
1025     FD_SET(fd, &read_fds);
1026
1027     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1028 }
1029
1030 /*
1031  *----------------------------------------------------------------------
1032  *
1033  * OS_Accept --
1034  *
1035  *      Accepts a new FastCGI connection.  This routine knows whether
1036  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1037  *
1038  * Results:
1039  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1040  *
1041  * Side effects:
1042  *      New IPC connection is accepted.
1043  *
1044  *----------------------------------------------------------------------
1045  */
1046 int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
1047 {
1048     int socket;
1049     union {
1050         struct sockaddr_un un;
1051         struct sockaddr_in in;
1052     } sa;
1053
1054     for (;;) {
1055         if (AcquireLock(listen_sock, fail_on_intr))
1056             return -1;
1057
1058         for (;;) {
1059             do {
1060 #ifdef HAVE_SOCKLEN
1061                 socklen_t len = sizeof(sa);
1062 #else
1063                 int len = sizeof(sa);
1064 #endif
1065                 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1066             } while (socket < 0 && errno == EINTR && !fail_on_intr);
1067
1068             if (socket < 0) {
1069                 if (!is_reasonable_accept_errno(errno)) {
1070                     int errnoSave = errno;
1071                     ReleaseLock(listen_sock);
1072                     errno = errnoSave;
1073                     return (-1);
1074                 }
1075                 errno = 0;
1076             }
1077             else {  /* socket >= 0 */
1078                 int set = 1;
1079
1080                 if (sa.in.sin_family != AF_INET)
1081                     break;
1082
1083 #ifdef TCP_NODELAY
1084                 /* No replies to outgoing data, so disable Nagle */
1085                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1086 #endif
1087
1088                 /* Check that the client IP address is approved */
1089                 if (ClientAddrOK(&sa.in, webServerAddrs))
1090                     break;
1091
1092                 close(socket);
1093             }  /* socket >= 0 */
1094         }  /* for(;;) */
1095
1096         if (ReleaseLock(listen_sock))
1097             return (-1);
1098
1099         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1100             break;
1101
1102         close(socket);
1103     }  /* while(1) - lock */
1104
1105     return (socket);
1106 }
1107 \f
1108 /*
1109  *----------------------------------------------------------------------
1110  *
1111  * OS_IpcClose
1112  *
1113  *      OS IPC routine to close an IPC connection.
1114  *
1115  * Results:
1116  *
1117  *
1118  * Side effects:
1119  *      IPC connection is closed.
1120  *
1121  *----------------------------------------------------------------------
1122  */
1123 int OS_IpcClose(int ipcFd)
1124 {
1125     return OS_Close(ipcFd);
1126 }
1127
1128 \f
1129 /*
1130  *----------------------------------------------------------------------
1131  *
1132  * OS_IsFcgi --
1133  *
1134  *      Determines whether this process is a FastCGI process or not.
1135  *
1136  * Results:
1137  *      Returns 1 if FastCGI, 0 if not.
1138  *
1139  * Side effects:
1140  *      None.
1141  *
1142  *----------------------------------------------------------------------
1143  */
1144 int OS_IsFcgi(int sock)
1145 {
1146         union {
1147         struct sockaddr_in in;
1148         struct sockaddr_un un;
1149     } sa;
1150 #ifdef HAVE_SOCKLEN
1151     socklen_t len = sizeof(sa);
1152 #else
1153     int len = sizeof(sa);
1154 #endif
1155
1156     if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1157         return TRUE;
1158     }
1159     else {
1160         return FALSE;
1161     }
1162 }
1163 \f
1164 /*
1165  *----------------------------------------------------------------------
1166  *
1167  * OS_SetFlags --
1168  *
1169  *      Sets selected flag bits in an open file descriptor.
1170  *
1171  *----------------------------------------------------------------------
1172  */
1173 void OS_SetFlags(int fd, int flags)
1174 {
1175     int val;
1176     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1177         exit(errno);
1178     }
1179     val |= flags;
1180     if(fcntl(fd, F_SETFL, val) < 0) {
1181         exit(errno);
1182     }
1183 }