libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *      Description of file.
   5  *
   6  *
   7  *  Copyright (c) 1995 Open Market, Inc.
   8  *  All rights reserved.
   9  *
  10  *  This file contains proprietary and confidential information and
  11  *  remains the unpublished property of Open Market, Inc. Use,
  12  *  disclosure, or reproduction is prohibited except as permitted by
  13  *  express written license agreement with Open Market, Inc.
  14  *
  15  *  Bill Snapper
  16  *  snapper@openmarket.com
  17  */
  18
  19 #ifndef lint
  20 static const char rcsid[] = "$Id: os_unix.c,v 1.31 2001/09/06 20:07:53 robs Exp $";
  21 #endif /* not lint */
  22
  23 #include "fcgi_config.h"
  24
  25 #include <sys/types.h>
  26
  27 #ifdef HAVE_NETINET_IN_H
  28 #include <netinet/in.h>
  29 #endif
  30
  31 #include <arpa/inet.h>
  32 #include <assert.h>
  33 #include <errno.h>
  34 #include <fcntl.h>      /* for fcntl */
  35 #include <math.h>
  36 #include <memory.h>     /* for memchr() */
  37 #include <netinet/tcp.h>
  38 #include <stdarg.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <sys/time.h>
  43 #include <sys/un.h>
  44 #include <signal.h>
  45
  46 #ifdef HAVE_NETDB_H
  47 #include <netdb.h>
  48 #endif
  49
  50 #ifdef HAVE_SYS_SOCKET_H
  51 #include <sys/socket.h> /* for getpeername */
  52 #endif
  53
  54 #ifdef HAVE_UNISTD_H
  55 #include <unistd.h>
  56 #endif
  57
  58 #include "fastcgi.h"
  59 #include "fcgimisc.h"
  60 #include "fcgios.h"
  61
  62 #ifndef INADDR_NONE
  63 #define INADDR_NONE ((unsigned long) -1)
  64 #endif
  65
  66 /*
  67  * This structure holds an entry for each oustanding async I/O operation.
  68  */
  69 typedef struct {
  70     OS_AsyncProc procPtr;           /* callout completion procedure */
  71     ClientData clientData;          /* caller private data */
  72     int fd;
  73     int len;
  74     int offset;
  75     void *buf;
  76     int inUse;
  77 } AioInfo;
  78
  79 /*
  80  * Entries in the async I/O table are allocated 2 per file descriptor.
  81  *
  82  * Read Entry Index  = fd * 2
  83  * Write Entry Index = (fd * 2) + 1
  84  */
  85 #define AIO_RD_IX(fd) (fd * 2)
  86 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  87
  88 static int asyncIoInUse = FALSE;
  89 static int asyncIoTableSize = 16;
  90 static AioInfo *asyncIoTable = NULL;
  91
  92 static int libInitialized = FALSE;
  93
  94 static fd_set readFdSet;
  95 static fd_set writeFdSet;
  96
  97 static fd_set readFdSetPost;
  98 static int numRdPosted = 0;
  99 static fd_set writeFdSetPost;
 100 static int numWrPosted = 0;
 101 static int volatile maxFd = -1;
 102
 103 static int shutdownPending = FALSE;
 104 static int shutdownNow = FALSE;
 105
 106 void OS_Shutdown()
 107 {
 108     shutdownNow = TRUE;
 109     OS_ShutdownPending();
 110 }
 111
 112 void OS_ShutdownPending()
 113 {
 114     shutdownPending = TRUE;
 115 }
 116
 117 static void OS_Sigusr1Handler(int signo)
 118 {
 119     OS_ShutdownPending();
 120 }
 121
 122 static void OS_SigpipeHandler(int signo)
 123 {
 124     ;
 125 }
 126
 127 static void installSignalHandler(int signo, const struct sigaction * act, int force)
 128 {
 129     struct sigaction sa;
 130
 131     sigaction(signo, NULL, &sa);
 132
 133     if (force || sa.sa_handler == SIG_DFL)
 134     {
 135         sigaction(signo, act, NULL);
 136     }
 137 }
 138
 139 static void OS_InstallSignalHandlers(int force)
 140 {
 141     struct sigaction sa;
 142
 143     sigemptyset(&sa.sa_mask);
 144     sa.sa_flags = 0;
 145
 146     sa.sa_handler = OS_SigpipeHandler;
 147     installSignalHandler(SIGPIPE, &sa, force);
 148
 149     sa.sa_handler = OS_Sigusr1Handler;
 150     installSignalHandler(SIGUSR1, &sa, force);
 151 }
 152
 153 /*
 154  *--------------------------------------------------------------
 155  *
 156  * OS_LibInit --
 157  *
 158  *      Set up the OS library for use.
 159  *
 160  *      NOTE: This function is really only needed for application
 161  *            asynchronous I/O.  It will most likely change in the
 162  *            future to setup the multi-threaded environment.
 163  *
 164  * Results:
 165  *      Returns 0 if success, -1 if not.
 166  *
 167  * Side effects:
 168  *      Async I/O table allocated and initialized.
 169  *
 170  *--------------------------------------------------------------
 171  */
 172 int OS_LibInit(int stdioFds[3])
 173 {
 174     if(libInitialized)
 175         return 0;
 176
 177     asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
 178     if(asyncIoTable == NULL) {
 179         errno = ENOMEM;
 180         return -1;
 181     }
 182     memset((char *) asyncIoTable, 0,
 183            asyncIoTableSize * sizeof(AioInfo));
 184
 185     FD_ZERO(&readFdSet);
 186     FD_ZERO(&writeFdSet);
 187     FD_ZERO(&readFdSetPost);
 188     FD_ZERO(&writeFdSetPost);
 189
 190     OS_InstallSignalHandlers(FALSE);
 191
 192     libInitialized = TRUE;
 193
 194     return 0;
 195 }
 196
 197 /*
 198  *--------------------------------------------------------------
 199  *
 200  * OS_LibShutdown --
 201  *
 202  *      Shutdown the OS library.
 203  *
 204  * Results:
 205  *      None.
 206  *
 207  * Side effects:
 208  *      Memory freed, fds closed.
 209  *
 210  *--------------------------------------------------------------
 211  */
 212 void OS_LibShutdown()
 213 {
 214     OS_Shutdown();
 215
 216     if(!libInitialized)
 217         return;
 218
 219     free(asyncIoTable);
 220     asyncIoTable = NULL;
 221     libInitialized = FALSE;
 222     return;
 223 }
 224
 225 /*
 226  *----------------------------------------------------------------------
 227  *
 228  * OS_BuildSockAddrUn --
 229  *
 230  *      Using the pathname bindPath, fill in the sockaddr_un structure
 231  *      *servAddrPtr and the length of this structure *servAddrLen.
 232  *
 233  *      The format of the sockaddr_un structure changed incompatibly in
 234  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 235  *      support one or the other.
 236  *
 237  * Results:
 238  *      0 for normal return, -1 for failure (bindPath too long).
 239  *
 240  *----------------------------------------------------------------------
 241  */
 242
 243 static int OS_BuildSockAddrUn(const char *bindPath,
 244                               struct sockaddr_un *servAddrPtr,
 245                               int *servAddrLen)
 246 {
 247     int bindPathLen = strlen(bindPath);
 248
 249 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 250     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 251         return -1;
 252     }
 253 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 254     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 255         return -1;
 256     }
 257 #endif
 258     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 259     servAddrPtr->sun_family = AF_UNIX;
 260     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 261 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 262     *servAddrLen = sizeof(servAddrPtr->sun_len)
 263             + sizeof(servAddrPtr->sun_family)
 264             + bindPathLen + 1;
 265     servAddrPtr->sun_len = *servAddrLen;
 266 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 267     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 268 #endif
 269     return 0;
 270 }
 271 union SockAddrUnion {
 272     struct  sockaddr_un unixVariant;
 273     struct  sockaddr_in inetVariant;
 274 };
 275
 276 /*
 277  * OS_CreateLocalIpcFd --
 278  *
 279  *   This procedure is responsible for creating the listener socket
 280  *   on Unix for local process communication.  It will create a
 281  *   domain socket or a TCP/IP socket bound to "localhost" and return
 282  *   a file descriptor to it to the caller.
 283  *
 284  * Results:
 285  *      Listener socket created.  This call returns either a valid
 286  *      file descriptor or -1 on error.
 287  *
 288  * Side effects:
 289  *      None.
 290  *
 291  *----------------------------------------------------------------------
 292  */
 293 int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
 294 {
 295     int listenSock, servLen;
 296     union   SockAddrUnion sa;
 297     int     tcp = FALSE;
 298     unsigned long tcp_ia;
 299     char    *tp;
 300     short   port;
 301     char    host[MAXPATHLEN];
 302
 303     strcpy(host, bindPath);
 304     if((tp = strchr(host, ':')) != 0) {
 305         *tp++ = 0;
 306         if((port = atoi(tp)) == 0) {
 307             *--tp = ':';
 308          } else {
 309             tcp = TRUE;
 310          }
 311     }
 312     if(tcp) {
 313       if (!*host || !strcmp(host,"*")) {
 314         tcp_ia = htonl(INADDR_ANY);
 315       } else {
 316         tcp_ia = inet_addr(host);
 317         if (tcp_ia == INADDR_NONE) {
 318           struct hostent * hep;
 319           hep = gethostbyname(host);
 320           if ((!hep) || (hep->h_addrtype != AF_INET || !hep->h_addr_list[0])) {
 321             fprintf(stderr, "Cannot resolve host name %s -- exiting!\n", host);
 322             exit(1);
 323           }
 324           if (hep->h_addr_list[1]) {
 325             fprintf(stderr, "Host %s has multiple addresses ---\n", host);
 326             fprintf(stderr, "you must choose one explicitly!!!\n");
 327             exit(1);
 328           }
 329           tcp_ia = ((struct in_addr *) (hep->h_addr))->s_addr;
 330         }
 331       }
 332     }
 333
 334     if(tcp) {
 335         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 336         if(listenSock >= 0) {
 337             int flag = 1;
 338             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 339                           (char *) &flag, sizeof(flag)) < 0) {
 340                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 341                 exit(1001);
 342             }
 343         }
 344     } else {
 345         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 346     }
 347     if(listenSock < 0) {
 348         return -1;
 349     }
 350
 351     /*
 352      * Bind the listening socket.
 353      */
 354     if(tcp) {
 355         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 356         sa.inetVariant.sin_family = AF_INET;
 357         sa.inetVariant.sin_addr.s_addr = tcp_ia;
 358         sa.inetVariant.sin_port = htons(port);
 359         servLen = sizeof(sa.inetVariant);
 360     } else {
 361         unlink(bindPath);
 362         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 363             fprintf(stderr, "Listening socket's path name is too long.\n");
 364             exit(1000);
 365         }
 366     }
 367     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 368        || listen(listenSock, backlog) < 0) {
 369         perror("bind/listen");
 370         exit(errno);
 371     }
 372
 373     return listenSock;
 374 }
 375
 376 /*
 377  *----------------------------------------------------------------------
 378  *
 379  * OS_FcgiConnect --
 380  *
 381  *      Create the socket and connect to the remote application if
 382  *      possible.
 383  *
 384  *      This was lifted from the cgi-fcgi application and was abstracted
 385  *      out because Windows NT does not have a domain socket and must
 386  *      use a named pipe which has a different API altogether.
 387  *
 388  * Results:
 389  *      -1 if fail or a valid file descriptor if connection succeeds.
 390  *
 391  * Side effects:
 392  *      Remote connection established.
 393  *
 394  *----------------------------------------------------------------------
 395  */
 396 int OS_FcgiConnect(char *bindPath)
 397 {
 398     union   SockAddrUnion sa;
 399     int servLen, resultSock;
 400     int connectStatus;
 401     char    *tp;
 402     char    host[MAXPATHLEN];
 403     short   port;
 404     int     tcp = FALSE;
 405
 406     strcpy(host, bindPath);
 407     if((tp = strchr(host, ':')) != 0) {
 408         *tp++ = 0;
 409         if((port = atoi(tp)) == 0) {
 410             *--tp = ':';
 411          } else {
 412             tcp = TRUE;
 413          }
 414     }
 415     if(tcp == TRUE) {
 416         struct  hostent *hp;
 417         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 418             fprintf(stderr, "Unknown host: %s\n", bindPath);
 419             exit(1000);
 420         }
 421         sa.inetVariant.sin_family = AF_INET;
 422         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 423         sa.inetVariant.sin_port = htons(port);
 424         servLen = sizeof(sa.inetVariant);
 425         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 426     } else {
 427         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 428             fprintf(stderr, "Listening socket's path name is too long.\n");
 429             exit(1000);
 430         }
 431         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 432     }
 433
 434     ASSERT(resultSock >= 0);
 435     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 436                              servLen);
 437     if(connectStatus >= 0) {
 438         return resultSock;
 439     } else {
 440         /*
 441          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 442          * and no FCGI application server is running.
 443          */
 444         close(resultSock);
 445         return -1;
 446     }
 447 }
 448
 449 /*
 450  *--------------------------------------------------------------
 451  *
 452  * OS_Read --
 453  *
 454  *      Pass through to the unix read function.
 455  *
 456  * Results:
 457  *      Returns number of byes read, 0, or -1 failure: errno
 458  *      contains actual error.
 459  *
 460  * Side effects:
 461  *      None.
 462  *
 463  *--------------------------------------------------------------
 464  */
 465 int OS_Read(int fd, char * buf, size_t len)
 466 {
 467     if (shutdownNow) return -1;
 468     return(read(fd, buf, len));
 469 }
 470
 471 /*
 472  *--------------------------------------------------------------
 473  *
 474  * OS_Write --
 475  *
 476  *      Pass through to unix write function.
 477  *
 478  * Results:
 479  *      Returns number of byes read, 0, or -1 failure: errno
 480  *      contains actual error.
 481  *
 482  * Side effects:
 483  *      none.
 484  *
 485  *--------------------------------------------------------------
 486  */
 487 int OS_Write(int fd, char * buf, size_t len)
 488 {
 489     if (shutdownNow) return -1;
 490     return(write(fd, buf, len));
 491 }
 492
 493 /*
 494  *----------------------------------------------------------------------
 495  *
 496  * OS_SpawnChild --
 497  *
 498  *      Spawns a new FastCGI listener process.
 499  *
 500  * Results:
 501  *      0 if success, -1 if error.
 502  *
 503  * Side effects:
 504  *      Child process spawned.
 505  *
 506  *----------------------------------------------------------------------
 507  */
 508 int OS_SpawnChild(char *appPath, int listenFd)
 509 {
 510     int forkResult;
 511
 512     forkResult = fork();
 513     if(forkResult < 0) {
 514         exit(errno);
 515     }
 516
 517     if(forkResult == 0) {
 518         /*
 519          * Close STDIN unconditionally.  It's used by the parent
 520          * process for CGI communication.  The FastCGI applciation
 521          * will be replacing this with the FastCGI listenFd IF
 522          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 523          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 524          * STDERR will be closed as the FastCGI process uses a
 525          * multiplexed socket in their place.
 526          */
 527         close(STDIN_FILENO);
 528
 529         /*
 530          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 531          * we're set.  If not, change it so the child knows where to
 532          * get the listen socket from.
 533          */
 534         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 535             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 536             close(listenFd);
 537         }
 538
 539         close(STDOUT_FILENO);
 540         close(STDERR_FILENO);
 541
 542         /*
 543          * We're a child.  Exec the application.
 544          *
 545          * XXX: entire environment passes through
 546          */
 547         execl(appPath, appPath, NULL);
 548         /*
 549          * XXX: Can't do this as we've already closed STDERR!!!
 550          *
 551          * perror("exec");
 552          */
 553         exit(errno);
 554     }
 555     return 0;
 556 }
 557
 558 /*
 559  *--------------------------------------------------------------
 560  *
 561  * OS_AsyncReadStdin --
 562  *
 563  *      This initiates an asynchronous read on the standard
 564  *      input handle.
 565  *
 566  *      The abstraction is necessary because Windows NT does not
 567  *      have a clean way of "select"ing a file descriptor for
 568  *      I/O.
 569  *
 570  * Results:
 571  *      -1 if error, 0 otherwise.
 572  *
 573  * Side effects:
 574  *      Asynchronous bit is set in the readfd variable and
 575  *      request is enqueued.
 576  *
 577  *--------------------------------------------------------------
 578  */
 579 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 580                       ClientData clientData)
 581 {
 582     int index = AIO_RD_IX(STDIN_FILENO);
 583
 584     asyncIoInUse = TRUE;
 585     ASSERT(asyncIoTable[index].inUse == 0);
 586     asyncIoTable[index].procPtr = procPtr;
 587     asyncIoTable[index].clientData = clientData;
 588     asyncIoTable[index].fd = STDIN_FILENO;
 589     asyncIoTable[index].len = len;
 590     asyncIoTable[index].offset = 0;
 591     asyncIoTable[index].buf = buf;
 592     asyncIoTable[index].inUse = 1;
 593     FD_SET(STDIN_FILENO, &readFdSet);
 594     if(STDIN_FILENO > maxFd)
 595         maxFd = STDIN_FILENO;
 596     return 0;
 597 }
 598
 599 static void GrowAsyncTable(void)
 600 {
 601     int oldTableSize = asyncIoTableSize;
 602
 603     asyncIoTableSize = asyncIoTableSize * 2;
 604     asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 605     if(asyncIoTable == NULL) {
 606         errno = ENOMEM;
 607         exit(errno);
 608     }
 609     memset((char *) &asyncIoTable[oldTableSize], 0,
 610            oldTableSize * sizeof(AioInfo));
 611
 612 }
 613
 614 /*
 615  *--------------------------------------------------------------
 616  *
 617  * OS_AsyncRead --
 618  *
 619  *      This initiates an asynchronous read on the file
 620  *      handle which may be a socket or named pipe.
 621  *
 622  *      We also must save the ProcPtr and ClientData, so later
 623  *      when the io completes, we know who to call.
 624  *
 625  *      We don't look at any results here (the ReadFile may
 626  *      return data if it is cached) but do all completion
 627  *      processing in OS_Select when we get the io completion
 628  *      port done notifications.  Then we call the callback.
 629  *
 630  * Results:
 631  *      -1 if error, 0 otherwise.
 632  *
 633  * Side effects:
 634  *      Asynchronous I/O operation is queued for completion.
 635  *
 636  *--------------------------------------------------------------
 637  */
 638 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 639                  OS_AsyncProc procPtr, ClientData clientData)
 640 {
 641     int index = AIO_RD_IX(fd);
 642
 643     ASSERT(asyncIoTable != NULL);
 644     asyncIoInUse = TRUE;
 645
 646     if(fd > maxFd)
 647         maxFd = fd;
 648
 649     if(index >= asyncIoTableSize) {
 650         GrowAsyncTable();
 651     }
 652
 653     ASSERT(asyncIoTable[index].inUse == 0);
 654     asyncIoTable[index].procPtr = procPtr;
 655     asyncIoTable[index].clientData = clientData;
 656     asyncIoTable[index].fd = fd;
 657     asyncIoTable[index].len = len;
 658     asyncIoTable[index].offset = offset;
 659     asyncIoTable[index].buf = buf;
 660     asyncIoTable[index].inUse = 1;
 661     FD_SET(fd, &readFdSet);
 662     return 0;
 663 }
 664
 665 /*
 666  *--------------------------------------------------------------
 667  *
 668  * OS_AsyncWrite --
 669  *
 670  *      This initiates an asynchronous write on the "fake" file
 671  *      descriptor (which may be a file, socket, or named pipe).
 672  *      We also must save the ProcPtr and ClientData, so later
 673  *      when the io completes, we know who to call.
 674  *
 675  *      We don't look at any results here (the WriteFile generally
 676  *      completes immediately) but do all completion processing
 677  *      in OS_DoIo when we get the io completion port done
 678  *      notifications.  Then we call the callback.
 679  *
 680  * Results:
 681  *      -1 if error, 0 otherwise.
 682  *
 683  * Side effects:
 684  *      Asynchronous I/O operation is queued for completion.
 685  *
 686  *--------------------------------------------------------------
 687  */
 688 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 689                   OS_AsyncProc procPtr, ClientData clientData)
 690 {
 691     int index = AIO_WR_IX(fd);
 692
 693     asyncIoInUse = TRUE;
 694
 695     if(fd > maxFd)
 696         maxFd = fd;
 697
 698     if(index >= asyncIoTableSize) {
 699         GrowAsyncTable();
 700     }
 701
 702     ASSERT(asyncIoTable[index].inUse == 0);
 703     asyncIoTable[index].procPtr = procPtr;
 704     asyncIoTable[index].clientData = clientData;
 705     asyncIoTable[index].fd = fd;
 706     asyncIoTable[index].len = len;
 707     asyncIoTable[index].offset = offset;
 708     asyncIoTable[index].buf = buf;
 709     asyncIoTable[index].inUse = 1;
 710     FD_SET(fd, &writeFdSet);
 711     return 0;
 712 }
 713
 714 /*
 715  *--------------------------------------------------------------
 716  *
 717  * OS_Close --
 718  *
 719  *      Closes the descriptor.  This is a pass through to the
 720  *      Unix close.
 721  *
 722  * Results:
 723  *      0 for success, -1 on failure
 724  *
 725  * Side effects:
 726  *      None.
 727  *
 728  *--------------------------------------------------------------
 729  */
 730 int OS_Close(int fd)
 731 {
 732     if (fd == -1)
 733         return 0;
 734
 735     if (asyncIoInUse) {
 736         int index = AIO_RD_IX(fd);
 737
 738         FD_CLR(fd, &readFdSet);
 739         FD_CLR(fd, &readFdSetPost);
 740         if (asyncIoTable[index].inUse != 0) {
 741             asyncIoTable[index].inUse = 0;
 742         }
 743
 744         FD_CLR(fd, &writeFdSet);
 745         FD_CLR(fd, &writeFdSetPost);
 746         index = AIO_WR_IX(fd);
 747         if (asyncIoTable[index].inUse != 0) {
 748             asyncIoTable[index].inUse = 0;
 749         }
 750
 751         if (maxFd == fd) {
 752             maxFd--;
 753         }
 754     }
 755     return close(fd);
 756 }
 757
 758 /*
 759  *--------------------------------------------------------------
 760  *
 761  * OS_CloseRead --
 762  *
 763  *      Cancel outstanding asynchronous reads and prevent subsequent
 764  *      reads from completing.
 765  *
 766  * Results:
 767  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 768  *              0 success, -1 failure
 769  *
 770  *--------------------------------------------------------------
 771  */
 772 int OS_CloseRead(int fd)
 773 {
 774     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 775         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 776         FD_CLR(fd, &readFdSet);
 777     }
 778
 779     return shutdown(fd, 0);
 780 }
 781
 782 /*
 783  *--------------------------------------------------------------
 784  *
 785  * OS_DoIo --
 786  *
 787  *      This function was formerly OS_Select.  It's purpose is
 788  *      to pull I/O completion events off the queue and dispatch
 789  *      them to the appropriate place.
 790  *
 791  * Results:
 792  *      Returns 0.
 793  *
 794  * Side effects:
 795  *      Handlers are called.
 796  *
 797  *--------------------------------------------------------------
 798  */
 799 int OS_DoIo(struct timeval *tmo)
 800 {
 801     int fd, len, selectStatus;
 802     OS_AsyncProc procPtr;
 803     ClientData clientData;
 804     AioInfo *aioPtr;
 805     fd_set readFdSetCpy;
 806     fd_set writeFdSetCpy;
 807
 808     asyncIoInUse = TRUE;
 809     FD_ZERO(&readFdSetCpy);
 810     FD_ZERO(&writeFdSetCpy);
 811
 812     for(fd = 0; fd <= maxFd; fd++) {
 813         if(FD_ISSET(fd, &readFdSet)) {
 814             FD_SET(fd, &readFdSetCpy);
 815         }
 816         if(FD_ISSET(fd, &writeFdSet)) {
 817             FD_SET(fd, &writeFdSetCpy);
 818         }
 819     }
 820
 821     /*
 822      * If there were no completed events from a prior call, see if there's
 823      * any work to do.
 824      */
 825     if(numRdPosted == 0 && numWrPosted == 0) {
 826         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 827                               NULL, tmo);
 828         if(selectStatus < 0) {
 829             exit(errno);
 830         }
 831
 832         for(fd = 0; fd <= maxFd; fd++) {
 833             /*
 834              * Build up a list of completed events.  We'll work off of
 835              * this list as opposed to looping through the read and write
 836              * fd sets since they can be affected by a callbacl routine.
 837              */
 838             if(FD_ISSET(fd, &readFdSetCpy)) {
 839                 numRdPosted++;
 840                 FD_SET(fd, &readFdSetPost);
 841                 FD_CLR(fd, &readFdSet);
 842             }
 843
 844             if(FD_ISSET(fd, &writeFdSetCpy)) {
 845                 numWrPosted++;
 846                 FD_SET(fd, &writeFdSetPost);
 847                 FD_CLR(fd, &writeFdSet);
 848             }
 849         }
 850     }
 851
 852     if(numRdPosted == 0 && numWrPosted == 0)
 853         return 0;
 854
 855     for(fd = 0; fd <= maxFd; fd++) {
 856         /*
 857          * Do reads and dispatch callback.
 858          */
 859         if(FD_ISSET(fd, &readFdSetPost)
 860            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 861
 862             numRdPosted--;
 863             FD_CLR(fd, &readFdSetPost);
 864             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 865
 866             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 867
 868             procPtr = aioPtr->procPtr;
 869             aioPtr->procPtr = NULL;
 870             clientData = aioPtr->clientData;
 871             aioPtr->inUse = 0;
 872
 873             (*procPtr)(clientData, len);
 874         }
 875
 876         /*
 877          * Do writes and dispatch callback.
 878          */
 879         if(FD_ISSET(fd, &writeFdSetPost) &&
 880            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 881
 882             numWrPosted--;
 883             FD_CLR(fd, &writeFdSetPost);
 884             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 885
 886             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 887
 888             procPtr = aioPtr->procPtr;
 889             aioPtr->procPtr = NULL;
 890             clientData = aioPtr->clientData;
 891             aioPtr->inUse = 0;
 892             (*procPtr)(clientData, len);
 893         }
 894     }
 895     return 0;
 896 }
 897
 898 /*
 899  * Not all systems have strdup().
 900  * @@@ autoconf should determine whether or not this is needed, but for now..
 901  */
 902 static char * str_dup(const char * str)
 903 {
 904     char * sdup = (char *) malloc(strlen(str) + 1);
 905
 906     if (sdup)
 907         strcpy(sdup, str);
 908
 909     return sdup;
 910 }
 911
 912 /*
 913  *----------------------------------------------------------------------
 914  *
 915  * ClientAddrOK --
 916  *
 917  *      Checks if a client address is in a list of allowed addresses
 918  *
 919  * Results:
 920  *      TRUE if address list is empty or client address is present
 921  *      in the list, FALSE otherwise.
 922  *
 923  *----------------------------------------------------------------------
 924  */
 925 static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
 926 {
 927     int result = FALSE;
 928     char *clientListCopy, *cur, *next;
 929
 930     if (clientList == NULL || *clientList == '\0') {
 931         return TRUE;
 932     }
 933
 934     clientListCopy = str_dup(clientList);
 935
 936     for (cur = clientListCopy; cur != NULL; cur = next) {
 937         next = strchr(cur, ',');
 938         if (next != NULL) {
 939             *next++ = '\0';
 940         }
 941         if (inet_addr(cur) == saPtr->sin_addr.s_addr) {
 942             result = TRUE;
 943             break;
 944         }
 945     }
 946
 947     free(clientListCopy);
 948     return result;
 949 }
 950
 951 /*
 952  *----------------------------------------------------------------------
 953  *
 954  * AcquireLock --
 955  *
 956  *      On platforms that implement concurrent calls to accept
 957  *      on a shared listening ipcFd, returns 0.  On other platforms,
 958  *      acquires an exclusive lock across all processes sharing a
 959  *      listening ipcFd, blocking until the lock has been acquired.
 960  *
 961  * Results:
 962  *      0 for successful call, -1 in case of system error (fatal).
 963  *
 964  * Side effects:
 965  *      This process now has the exclusive lock.
 966  *
 967  *----------------------------------------------------------------------
 968  */
 969 static int AcquireLock(int sock, int fail_on_intr)
 970 {
 971 #ifdef USE_LOCKING
 972     do {
 973         struct flock lock;
 974         lock.l_type = F_WRLCK;
 975         lock.l_start = 0;
 976         lock.l_whence = SEEK_SET;
 977         lock.l_len = 0;
 978
 979         if (fcntl(sock, F_SETLKW, &lock) != -1)
 980             return 0;
 981     } while (errno == EINTR
 982              && ! fail_on_intr
 983              && ! shutdownPending);
 984
 985     return -1;
 986
 987 #else
 988     return 0;
 989 #endif
 990 }
 991
 992 /*
 993  *----------------------------------------------------------------------
 994  *
 995  * ReleaseLock --
 996  *
 997  *      On platforms that implement concurrent calls to accept
 998  *      on a shared listening ipcFd, does nothing.  On other platforms,
 999  *      releases an exclusive lock acquired by AcquireLock.
1000  *
1001  * Results:
1002  *      0 for successful call, -1 in case of system error (fatal).
1003  *
1004  * Side effects:
1005  *      This process no longer holds the lock.
1006  *
1007  *----------------------------------------------------------------------
1008  */
1009 static int ReleaseLock(int sock)
1010 {
1011 #ifdef USE_LOCKING
1012     do {
1013         struct flock lock;
1014         lock.l_type = F_UNLCK;
1015         lock.l_start = 0;
1016         lock.l_whence = SEEK_SET;
1017         lock.l_len = 0;
1018
1019         if (fcntl(sock, F_SETLK, &lock) != -1)
1020             return 0;
1021     } while (errno == EINTR);
1022
1023     return -1;
1024
1025 #else
1026     return 0;
1027 #endif
1028 }
1029
1030 /**********************************************************************
1031  * Determine if the errno resulting from a failed accept() warrants a
1032  * retry or exit().  Based on Apache's http_main.c accept() handling
1033  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
1034  */
1035 static int is_reasonable_accept_errno (const int error)
1036 {
1037     switch (error) {
1038 #ifdef EPROTO
1039         /* EPROTO on certain older kernels really means ECONNABORTED, so
1040          * we need to ignore it for them.  See discussion in new-httpd
1041          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
1042          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
1043          * other boxes that implement tcp sockets in userland (i.e. on top of
1044          * STREAMS).  On these systems, EPROTO can actually result in a fatal
1045          * loop.  See PR#981 for example.  It's hard to handle both uses of
1046          * EPROTO. */
1047         case EPROTO:
1048 #endif
1049 #ifdef ECONNABORTED
1050         case ECONNABORTED:
1051 #endif
1052         /* Linux generates the rest of these, other tcp stacks (i.e.
1053          * bsd) tend to hide them behind getsockopt() interfaces.  They
1054          * occur when the net goes sour or the client disconnects after the
1055          * three-way handshake has been done in the kernel but before
1056          * userland has picked up the socket. */
1057 #ifdef ECONNRESET
1058         case ECONNRESET:
1059 #endif
1060 #ifdef ETIMEDOUT
1061         case ETIMEDOUT:
1062 #endif
1063 #ifdef EHOSTUNREACH
1064         case EHOSTUNREACH:
1065 #endif
1066 #ifdef ENETUNREACH
1067         case ENETUNREACH:
1068 #endif
1069             return 1;
1070
1071         default:
1072             return 0;
1073     }
1074 }
1075
1076 /**********************************************************************
1077  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1078  * others?).  When a connect() is made to a Unix Domain socket, but its
1079  * not accept()ed before the web server gets impatient and close()s, an
1080  * accept() results in a valid file descriptor, but no data to read.
1081  * This causes a block on the first read() - which never returns!
1082  *
1083  * Another approach to this is to write() to the socket to provoke a
1084  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1085  * that whatever is written has to be universally ignored by all FastCGI
1086  * web servers, and a SIGPIPE handler has to be installed which returns
1087  * (or SIGPIPE is ignored).
1088  *
1089  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1090  *
1091  * Making it shorter is probably safe, but I'll leave that to you.  Making
1092  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1093  * the faster your application will be able to recover (waiting 2 seconds
1094  * may _cause_ the problem when there is a very high demand). At any rate,
1095  * this is better than perma-blocking.
1096  */
1097 static int is_af_unix_keeper(const int fd)
1098 {
1099     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1100     fd_set read_fds;
1101
1102     FD_ZERO(&read_fds);
1103     FD_SET(fd, &read_fds);
1104
1105     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1106 }
1107
1108 /*
1109  *----------------------------------------------------------------------
1110  *
1111  * OS_Accept --
1112  *
1113  *      Accepts a new FastCGI connection.  This routine knows whether
1114  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1115  *
1116  * Results:
1117  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1118  *
1119  * Side effects:
1120  *      New IPC connection is accepted.
1121  *
1122  *----------------------------------------------------------------------
1123  */
1124 int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
1125 {
1126     int socket = -1;
1127     union {
1128         struct sockaddr_un un;
1129         struct sockaddr_in in;
1130     } sa;
1131
1132     for (;;) {
1133         if (AcquireLock(listen_sock, fail_on_intr))
1134             return -1;
1135
1136         for (;;) {
1137             do {
1138 #ifdef HAVE_SOCKLEN
1139                 socklen_t len = sizeof(sa);
1140 #else
1141                 int len = sizeof(sa);
1142 #endif
1143                 if (shutdownPending) break;
1144                 /* There's a window here */
1145
1146                 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1147             } while (socket < 0
1148                      && errno == EINTR
1149                      && ! fail_on_intr
1150                      && ! shutdownPending);
1151
1152             if (socket < 0) {
1153                 if (shutdownPending || ! is_reasonable_accept_errno(errno)) {
1154                     int errnoSave = errno;
1155
1156                     ReleaseLock(listen_sock);
1157
1158                     if (! shutdownPending) {
1159                         errno = errnoSave;
1160                     }
1161
1162                     return (-1);
1163                 }
1164                 errno = 0;
1165             }
1166             else {  /* socket >= 0 */
1167                 int set = 1;
1168
1169                 if (sa.in.sin_family != AF_INET)
1170                     break;
1171
1172 #ifdef TCP_NODELAY
1173                 /* No replies to outgoing data, so disable Nagle */
1174                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1175 #endif
1176
1177                 /* Check that the client IP address is approved */
1178                 if (ClientAddrOK(&sa.in, webServerAddrs))
1179                     break;
1180
1181                 close(socket);
1182             }  /* socket >= 0 */
1183         }  /* for(;;) */
1184
1185         if (ReleaseLock(listen_sock))
1186             return (-1);
1187
1188         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1189             break;
1190
1191         close(socket);
1192     }  /* while(1) - lock */
1193
1194     return (socket);
1195 }
1196
1197 /*
1198  *----------------------------------------------------------------------
1199  *
1200  * OS_IpcClose
1201  *
1202  *      OS IPC routine to close an IPC connection.
1203  *
1204  * Results:
1205  *
1206  *
1207  * Side effects:
1208  *      IPC connection is closed.
1209  *
1210  *----------------------------------------------------------------------
1211  */
1212 int OS_IpcClose(int ipcFd)
1213 {
1214     return OS_Close(ipcFd);
1215 }
1216
1217 /*
1218  *----------------------------------------------------------------------
1219  *
1220  * OS_IsFcgi --
1221  *
1222  *      Determines whether this process is a FastCGI process or not.
1223  *
1224  * Results:
1225  *      Returns 1 if FastCGI, 0 if not.
1226  *
1227  * Side effects:
1228  *      None.
1229  *
1230  *----------------------------------------------------------------------
1231  */
1232 int OS_IsFcgi(int sock)
1233 {
1234         union {
1235         struct sockaddr_in in;
1236         struct sockaddr_un un;
1237     } sa;
1238 #ifdef HAVE_SOCKLEN
1239     socklen_t len = sizeof(sa);
1240 #else
1241     int len = sizeof(sa);
1242 #endif
1243
1244     errno = 0;
1245
1246     if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1247         return TRUE;
1248     }
1249     else {
1250         return FALSE;
1251     }
1252 }
1253
1254 /*
1255  *----------------------------------------------------------------------
1256  *
1257  * OS_SetFlags --
1258  *
1259  *      Sets selected flag bits in an open file descriptor.
1260  *
1261  *----------------------------------------------------------------------
1262  */
1263 void OS_SetFlags(int fd, int flags)
1264 {
1265     int val;
1266     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1267         exit(errno);
1268     }
1269     val |= flags;
1270     if(fcntl(fd, F_SETFL, val) < 0) {
1271         exit(errno);
1272     }
1273 }