libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *      Description of file.
   5  *
   6  *
   7  *  Copyright (c) 1995 Open Market, Inc.
   8  *  All rights reserved.
   9  *
  10  *  This file contains proprietary and confidential information and
  11  *  remains the unpublished property of Open Market, Inc. Use,
  12  *  disclosure, or reproduction is prohibited except as permitted by
  13  *  express written license agreement with Open Market, Inc.
  14  *
  15  *  Bill Snapper
  16  *  snapper@openmarket.com
  17  */
  18
  19 #ifndef lint
  20 static const char rcsid[] = "$Id: os_unix.c,v 1.28 2001/06/22 14:26:39 robs Exp $";
  21 #endif /* not lint */
  22
  23 #include "fcgi_config.h"
  24
  25 #include <sys/types.h>
  26
  27 #ifdef HAVE_NETINET_IN_H
  28 #include <netinet/in.h>
  29 #endif
  30
  31 #include <arpa/inet.h>
  32 #include <assert.h>
  33 #include <errno.h>
  34 #include <fcntl.h>      /* for fcntl */
  35 #include <math.h>
  36 #include <memory.h>     /* for memchr() */
  37 #include <netinet/tcp.h>
  38 #include <stdarg.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <sys/time.h>
  43 #include <sys/un.h>
  44
  45 #ifdef HAVE_NETDB_H
  46 #include <netdb.h>
  47 #endif
  48
  49 #ifdef HAVE_SYS_SOCKET_H
  50 #include <sys/socket.h> /* for getpeername */
  51 #endif
  52
  53 #ifdef HAVE_UNISTD_H
  54 #include <unistd.h>
  55 #endif
  56
  57 #include "fastcgi.h"
  58 #include "fcgimisc.h"
  59 #include "fcgios.h"
  60
  61 #ifndef INADDR_NONE
  62 #define INADDR_NONE ((unsigned long) -1)
  63 #endif
  64
  65 /*
  66  * This structure holds an entry for each oustanding async I/O operation.
  67  */
  68 typedef struct {
  69     OS_AsyncProc procPtr;           /* callout completion procedure */
  70     ClientData clientData;          /* caller private data */
  71     int fd;
  72     int len;
  73     int offset;
  74     void *buf;
  75     int inUse;
  76 } AioInfo;
  77
  78 /*
  79  * Entries in the async I/O table are allocated 2 per file descriptor.
  80  *
  81  * Read Entry Index  = fd * 2
  82  * Write Entry Index = (fd * 2) + 1
  83  */
  84 #define AIO_RD_IX(fd) (fd * 2)
  85 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  86
  87 static int asyncIoInUse = FALSE;
  88 static int asyncIoTableSize = 16;
  89 static AioInfo *asyncIoTable = NULL;
  90
  91 static int libInitialized = FALSE;
  92
  93 static fd_set readFdSet;
  94 static fd_set writeFdSet;
  95
  96 static fd_set readFdSetPost;
  97 static int numRdPosted = 0;
  98 static fd_set writeFdSetPost;
  99 static int numWrPosted = 0;
 100 static int volatile maxFd = -1;
 101
 102 /*
 103  *--------------------------------------------------------------
 104  *
 105  * OS_LibInit --
 106  *
 107  *      Set up the OS library for use.
 108  *
 109  *      NOTE: This function is really only needed for application
 110  *            asynchronous I/O.  It will most likely change in the
 111  *            future to setup the multi-threaded environment.
 112  *
 113  * Results:
 114  *      Returns 0 if success, -1 if not.
 115  *
 116  * Side effects:
 117  *      Async I/O table allocated and initialized.
 118  *
 119  *--------------------------------------------------------------
 120  */
 121 int OS_LibInit(int stdioFds[3])
 122 {
 123     if(libInitialized)
 124         return 0;
 125
 126     asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
 127     if(asyncIoTable == NULL) {
 128         errno = ENOMEM;
 129         return -1;
 130     }
 131     memset((char *) asyncIoTable, 0,
 132            asyncIoTableSize * sizeof(AioInfo));
 133
 134     FD_ZERO(&readFdSet);
 135     FD_ZERO(&writeFdSet);
 136     FD_ZERO(&readFdSetPost);
 137     FD_ZERO(&writeFdSetPost);
 138     libInitialized = TRUE;
 139     return 0;
 140 }
 141
 142 /*
 143  *--------------------------------------------------------------
 144  *
 145  * OS_LibShutdown --
 146  *
 147  *      Shutdown the OS library.
 148  *
 149  * Results:
 150  *      None.
 151  *
 152  * Side effects:
 153  *      Memory freed, fds closed.
 154  *
 155  *--------------------------------------------------------------
 156  */
 157 void OS_LibShutdown()
 158 {
 159     if(!libInitialized)
 160         return;
 161
 162     free(asyncIoTable);
 163     asyncIoTable = NULL;
 164     libInitialized = FALSE;
 165     return;
 166 }
 167
 168 /*
 169  *----------------------------------------------------------------------
 170  *
 171  * OS_BuildSockAddrUn --
 172  *
 173  *      Using the pathname bindPath, fill in the sockaddr_un structure
 174  *      *servAddrPtr and the length of this structure *servAddrLen.
 175  *
 176  *      The format of the sockaddr_un structure changed incompatibly in
 177  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 178  *      support one or the other.
 179  *
 180  * Results:
 181  *      0 for normal return, -1 for failure (bindPath too long).
 182  *
 183  *----------------------------------------------------------------------
 184  */
 185
 186 static int OS_BuildSockAddrUn(const char *bindPath,
 187                               struct sockaddr_un *servAddrPtr,
 188                               int *servAddrLen)
 189 {
 190     int bindPathLen = strlen(bindPath);
 191
 192 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 193     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 194         return -1;
 195     }
 196 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 197     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 198         return -1;
 199     }
 200 #endif
 201     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 202     servAddrPtr->sun_family = AF_UNIX;
 203     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 204 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 205     *servAddrLen = sizeof(servAddrPtr->sun_len)
 206             + sizeof(servAddrPtr->sun_family)
 207             + bindPathLen + 1;
 208     servAddrPtr->sun_len = *servAddrLen;
 209 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 210     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 211 #endif
 212     return 0;
 213 }
 214 union SockAddrUnion {
 215     struct  sockaddr_un unixVariant;
 216     struct  sockaddr_in inetVariant;
 217 };
 218
 219 /*
 220  * OS_CreateLocalIpcFd --
 221  *
 222  *   This procedure is responsible for creating the listener socket
 223  *   on Unix for local process communication.  It will create a
 224  *   domain socket or a TCP/IP socket bound to "localhost" and return
 225  *   a file descriptor to it to the caller.
 226  *
 227  * Results:
 228  *      Listener socket created.  This call returns either a valid
 229  *      file descriptor or -1 on error.
 230  *
 231  * Side effects:
 232  *      None.
 233  *
 234  *----------------------------------------------------------------------
 235  */
 236 int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
 237 {
 238     int listenSock, servLen;
 239     union   SockAddrUnion sa;
 240     int     tcp = FALSE;
 241     unsigned long tcp_ia;
 242     char    *tp;
 243     short   port;
 244     char    host[MAXPATHLEN];
 245
 246     strcpy(host, bindPath);
 247     if((tp = strchr(host, ':')) != 0) {
 248         *tp++ = 0;
 249         if((port = atoi(tp)) == 0) {
 250             *--tp = ':';
 251          } else {
 252             tcp = TRUE;
 253          }
 254     }
 255     if(tcp) {
 256       if (!*host || !strcmp(host,"*")) {
 257         tcp_ia = htonl(INADDR_ANY);
 258       } else {
 259         tcp_ia = inet_addr(host);
 260         if (tcp_ia == INADDR_NONE) {
 261           struct hostent * hep;
 262           hep = gethostbyname(host);
 263           if ((!hep) || (hep->h_addrtype != AF_INET || !hep->h_addr_list[0])) {
 264             fprintf(stderr, "Cannot resolve host name %s -- exiting!\n", host);
 265             exit(1);
 266           }
 267           if (hep->h_addr_list[1]) {
 268             fprintf(stderr, "Host %s has multiple addresses ---\n", host);
 269             fprintf(stderr, "you must choose one explicitly!!!\n");
 270             exit(1);
 271           }
 272           tcp_ia = ((struct in_addr *) (hep->h_addr))->s_addr;
 273         }
 274       }
 275     }
 276
 277     if(tcp) {
 278         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 279         if(listenSock >= 0) {
 280             int flag = 1;
 281             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 282                           (char *) &flag, sizeof(flag)) < 0) {
 283                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 284                 exit(1001);
 285             }
 286         }
 287     } else {
 288         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 289     }
 290     if(listenSock < 0) {
 291         return -1;
 292     }
 293
 294     /*
 295      * Bind the listening socket.
 296      */
 297     if(tcp) {
 298         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 299         sa.inetVariant.sin_family = AF_INET;
 300         sa.inetVariant.sin_addr.s_addr = tcp_ia;
 301         sa.inetVariant.sin_port = htons(port);
 302         servLen = sizeof(sa.inetVariant);
 303     } else {
 304         unlink(bindPath);
 305         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 306             fprintf(stderr, "Listening socket's path name is too long.\n");
 307             exit(1000);
 308         }
 309     }
 310     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 311        || listen(listenSock, backlog) < 0) {
 312         perror("bind/listen");
 313         exit(errno);
 314     }
 315
 316     return listenSock;
 317 }
 318
 319 /*
 320  *----------------------------------------------------------------------
 321  *
 322  * OS_FcgiConnect --
 323  *
 324  *      Create the socket and connect to the remote application if
 325  *      possible.
 326  *
 327  *      This was lifted from the cgi-fcgi application and was abstracted
 328  *      out because Windows NT does not have a domain socket and must
 329  *      use a named pipe which has a different API altogether.
 330  *
 331  * Results:
 332  *      -1 if fail or a valid file descriptor if connection succeeds.
 333  *
 334  * Side effects:
 335  *      Remote connection established.
 336  *
 337  *----------------------------------------------------------------------
 338  */
 339 int OS_FcgiConnect(char *bindPath)
 340 {
 341     union   SockAddrUnion sa;
 342     int servLen, resultSock;
 343     int connectStatus;
 344     char    *tp;
 345     char    host[MAXPATHLEN];
 346     short   port;
 347     int     tcp = FALSE;
 348
 349     strcpy(host, bindPath);
 350     if((tp = strchr(host, ':')) != 0) {
 351         *tp++ = 0;
 352         if((port = atoi(tp)) == 0) {
 353             *--tp = ':';
 354          } else {
 355             tcp = TRUE;
 356          }
 357     }
 358     if(tcp == TRUE) {
 359         struct  hostent *hp;
 360         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 361             fprintf(stderr, "Unknown host: %s\n", bindPath);
 362             exit(1000);
 363         }
 364         sa.inetVariant.sin_family = AF_INET;
 365         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 366         sa.inetVariant.sin_port = htons(port);
 367         servLen = sizeof(sa.inetVariant);
 368         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 369     } else {
 370         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 371             fprintf(stderr, "Listening socket's path name is too long.\n");
 372             exit(1000);
 373         }
 374         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 375     }
 376
 377     ASSERT(resultSock >= 0);
 378     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 379                              servLen);
 380     if(connectStatus >= 0) {
 381         return resultSock;
 382     } else {
 383         /*
 384          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 385          * and no FCGI application server is running.
 386          */
 387         close(resultSock);
 388         return -1;
 389     }
 390 }
 391
 392 /*
 393  *--------------------------------------------------------------
 394  *
 395  * OS_Read --
 396  *
 397  *      Pass through to the unix read function.
 398  *
 399  * Results:
 400  *      Returns number of byes read, 0, or -1 failure: errno
 401  *      contains actual error.
 402  *
 403  * Side effects:
 404  *      None.
 405  *
 406  *--------------------------------------------------------------
 407  */
 408 int OS_Read(int fd, char * buf, size_t len)
 409 {
 410     return(read(fd, buf, len));
 411 }
 412
 413 /*
 414  *--------------------------------------------------------------
 415  *
 416  * OS_Write --
 417  *
 418  *      Pass through to unix write function.
 419  *
 420  * Results:
 421  *      Returns number of byes read, 0, or -1 failure: errno
 422  *      contains actual error.
 423  *
 424  * Side effects:
 425  *      none.
 426  *
 427  *--------------------------------------------------------------
 428  */
 429 int OS_Write(int fd, char * buf, size_t len)
 430 {
 431     return(write(fd, buf, len));
 432 }
 433
 434 /*
 435  *----------------------------------------------------------------------
 436  *
 437  * OS_SpawnChild --
 438  *
 439  *      Spawns a new FastCGI listener process.
 440  *
 441  * Results:
 442  *      0 if success, -1 if error.
 443  *
 444  * Side effects:
 445  *      Child process spawned.
 446  *
 447  *----------------------------------------------------------------------
 448  */
 449 int OS_SpawnChild(char *appPath, int listenFd)
 450 {
 451     int forkResult;
 452
 453     forkResult = fork();
 454     if(forkResult < 0) {
 455         exit(errno);
 456     }
 457
 458     if(forkResult == 0) {
 459         /*
 460          * Close STDIN unconditionally.  It's used by the parent
 461          * process for CGI communication.  The FastCGI applciation
 462          * will be replacing this with the FastCGI listenFd IF
 463          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 464          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 465          * STDERR will be closed as the FastCGI process uses a
 466          * multiplexed socket in their place.
 467          */
 468         close(STDIN_FILENO);
 469
 470         /*
 471          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 472          * we're set.  If not, change it so the child knows where to
 473          * get the listen socket from.
 474          */
 475         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 476             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 477             close(listenFd);
 478         }
 479
 480         close(STDOUT_FILENO);
 481         close(STDERR_FILENO);
 482
 483         /*
 484          * We're a child.  Exec the application.
 485          *
 486          * XXX: entire environment passes through
 487          */
 488         execl(appPath, appPath, NULL);
 489         /*
 490          * XXX: Can't do this as we've already closed STDERR!!!
 491          *
 492          * perror("exec");
 493          */
 494         exit(errno);
 495     }
 496     return 0;
 497 }
 498
 499 /*
 500  *--------------------------------------------------------------
 501  *
 502  * OS_AsyncReadStdin --
 503  *
 504  *      This initiates an asynchronous read on the standard
 505  *      input handle.
 506  *
 507  *      The abstraction is necessary because Windows NT does not
 508  *      have a clean way of "select"ing a file descriptor for
 509  *      I/O.
 510  *
 511  * Results:
 512  *      -1 if error, 0 otherwise.
 513  *
 514  * Side effects:
 515  *      Asynchronous bit is set in the readfd variable and
 516  *      request is enqueued.
 517  *
 518  *--------------------------------------------------------------
 519  */
 520 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 521                       ClientData clientData)
 522 {
 523     int index = AIO_RD_IX(STDIN_FILENO);
 524
 525     asyncIoInUse = TRUE;
 526     ASSERT(asyncIoTable[index].inUse == 0);
 527     asyncIoTable[index].procPtr = procPtr;
 528     asyncIoTable[index].clientData = clientData;
 529     asyncIoTable[index].fd = STDIN_FILENO;
 530     asyncIoTable[index].len = len;
 531     asyncIoTable[index].offset = 0;
 532     asyncIoTable[index].buf = buf;
 533     asyncIoTable[index].inUse = 1;
 534     FD_SET(STDIN_FILENO, &readFdSet);
 535     if(STDIN_FILENO > maxFd)
 536         maxFd = STDIN_FILENO;
 537     return 0;
 538 }
 539
 540 static void GrowAsyncTable(void)
 541 {
 542     int oldTableSize = asyncIoTableSize;
 543
 544     asyncIoTableSize = asyncIoTableSize * 2;
 545     asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 546     if(asyncIoTable == NULL) {
 547         errno = ENOMEM;
 548         exit(errno);
 549     }
 550     memset((char *) &asyncIoTable[oldTableSize], 0,
 551            oldTableSize * sizeof(AioInfo));
 552
 553 }
 554
 555 /*
 556  *--------------------------------------------------------------
 557  *
 558  * OS_AsyncRead --
 559  *
 560  *      This initiates an asynchronous read on the file
 561  *      handle which may be a socket or named pipe.
 562  *
 563  *      We also must save the ProcPtr and ClientData, so later
 564  *      when the io completes, we know who to call.
 565  *
 566  *      We don't look at any results here (the ReadFile may
 567  *      return data if it is cached) but do all completion
 568  *      processing in OS_Select when we get the io completion
 569  *      port done notifications.  Then we call the callback.
 570  *
 571  * Results:
 572  *      -1 if error, 0 otherwise.
 573  *
 574  * Side effects:
 575  *      Asynchronous I/O operation is queued for completion.
 576  *
 577  *--------------------------------------------------------------
 578  */
 579 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 580                  OS_AsyncProc procPtr, ClientData clientData)
 581 {
 582     int index = AIO_RD_IX(fd);
 583
 584     ASSERT(asyncIoTable != NULL);
 585     asyncIoInUse = TRUE;
 586
 587     if(fd > maxFd)
 588         maxFd = fd;
 589
 590     if(index >= asyncIoTableSize) {
 591         GrowAsyncTable();
 592     }
 593
 594     ASSERT(asyncIoTable[index].inUse == 0);
 595     asyncIoTable[index].procPtr = procPtr;
 596     asyncIoTable[index].clientData = clientData;
 597     asyncIoTable[index].fd = fd;
 598     asyncIoTable[index].len = len;
 599     asyncIoTable[index].offset = offset;
 600     asyncIoTable[index].buf = buf;
 601     asyncIoTable[index].inUse = 1;
 602     FD_SET(fd, &readFdSet);
 603     return 0;
 604 }
 605
 606 /*
 607  *--------------------------------------------------------------
 608  *
 609  * OS_AsyncWrite --
 610  *
 611  *      This initiates an asynchronous write on the "fake" file
 612  *      descriptor (which may be a file, socket, or named pipe).
 613  *      We also must save the ProcPtr and ClientData, so later
 614  *      when the io completes, we know who to call.
 615  *
 616  *      We don't look at any results here (the WriteFile generally
 617  *      completes immediately) but do all completion processing
 618  *      in OS_DoIo when we get the io completion port done
 619  *      notifications.  Then we call the callback.
 620  *
 621  * Results:
 622  *      -1 if error, 0 otherwise.
 623  *
 624  * Side effects:
 625  *      Asynchronous I/O operation is queued for completion.
 626  *
 627  *--------------------------------------------------------------
 628  */
 629 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 630                   OS_AsyncProc procPtr, ClientData clientData)
 631 {
 632     int index = AIO_WR_IX(fd);
 633
 634     asyncIoInUse = TRUE;
 635
 636     if(fd > maxFd)
 637         maxFd = fd;
 638
 639     if(index >= asyncIoTableSize) {
 640         GrowAsyncTable();
 641     }
 642
 643     ASSERT(asyncIoTable[index].inUse == 0);
 644     asyncIoTable[index].procPtr = procPtr;
 645     asyncIoTable[index].clientData = clientData;
 646     asyncIoTable[index].fd = fd;
 647     asyncIoTable[index].len = len;
 648     asyncIoTable[index].offset = offset;
 649     asyncIoTable[index].buf = buf;
 650     asyncIoTable[index].inUse = 1;
 651     FD_SET(fd, &writeFdSet);
 652     return 0;
 653 }
 654
 655 /*
 656  *--------------------------------------------------------------
 657  *
 658  * OS_Close --
 659  *
 660  *      Closes the descriptor.  This is a pass through to the
 661  *      Unix close.
 662  *
 663  * Results:
 664  *      0 for success, -1 on failure
 665  *
 666  * Side effects:
 667  *      None.
 668  *
 669  *--------------------------------------------------------------
 670  */
 671 int OS_Close(int fd)
 672 {
 673     if (fd == -1)
 674         return 0;
 675
 676     if (asyncIoInUse) {
 677         int index = AIO_RD_IX(fd);
 678
 679         FD_CLR(fd, &readFdSet);
 680         FD_CLR(fd, &readFdSetPost);
 681         if (asyncIoTable[index].inUse != 0) {
 682             asyncIoTable[index].inUse = 0;
 683         }
 684
 685         FD_CLR(fd, &writeFdSet);
 686         FD_CLR(fd, &writeFdSetPost);
 687         index = AIO_WR_IX(fd);
 688         if (asyncIoTable[index].inUse != 0) {
 689             asyncIoTable[index].inUse = 0;
 690         }
 691
 692         if (maxFd == fd) {
 693             maxFd--;
 694         }
 695     }
 696     return close(fd);
 697 }
 698
 699 /*
 700  *--------------------------------------------------------------
 701  *
 702  * OS_CloseRead --
 703  *
 704  *      Cancel outstanding asynchronous reads and prevent subsequent
 705  *      reads from completing.
 706  *
 707  * Results:
 708  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 709  *              0 success, -1 failure
 710  *
 711  *--------------------------------------------------------------
 712  */
 713 int OS_CloseRead(int fd)
 714 {
 715     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 716         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 717         FD_CLR(fd, &readFdSet);
 718     }
 719
 720     return shutdown(fd, 0);
 721 }
 722
 723 /*
 724  *--------------------------------------------------------------
 725  *
 726  * OS_DoIo --
 727  *
 728  *      This function was formerly OS_Select.  It's purpose is
 729  *      to pull I/O completion events off the queue and dispatch
 730  *      them to the appropriate place.
 731  *
 732  * Results:
 733  *      Returns 0.
 734  *
 735  * Side effects:
 736  *      Handlers are called.
 737  *
 738  *--------------------------------------------------------------
 739  */
 740 int OS_DoIo(struct timeval *tmo)
 741 {
 742     int fd, len, selectStatus;
 743     OS_AsyncProc procPtr;
 744     ClientData clientData;
 745     AioInfo *aioPtr;
 746     fd_set readFdSetCpy;
 747     fd_set writeFdSetCpy;
 748
 749     asyncIoInUse = TRUE;
 750     FD_ZERO(&readFdSetCpy);
 751     FD_ZERO(&writeFdSetCpy);
 752
 753     for(fd = 0; fd <= maxFd; fd++) {
 754         if(FD_ISSET(fd, &readFdSet)) {
 755             FD_SET(fd, &readFdSetCpy);
 756         }
 757         if(FD_ISSET(fd, &writeFdSet)) {
 758             FD_SET(fd, &writeFdSetCpy);
 759         }
 760     }
 761
 762     /*
 763      * If there were no completed events from a prior call, see if there's
 764      * any work to do.
 765      */
 766     if(numRdPosted == 0 && numWrPosted == 0) {
 767         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 768                               NULL, tmo);
 769         if(selectStatus < 0) {
 770             exit(errno);
 771         }
 772
 773         for(fd = 0; fd <= maxFd; fd++) {
 774             /*
 775              * Build up a list of completed events.  We'll work off of
 776              * this list as opposed to looping through the read and write
 777              * fd sets since they can be affected by a callbacl routine.
 778              */
 779             if(FD_ISSET(fd, &readFdSetCpy)) {
 780                 numRdPosted++;
 781                 FD_SET(fd, &readFdSetPost);
 782                 FD_CLR(fd, &readFdSet);
 783             }
 784
 785             if(FD_ISSET(fd, &writeFdSetCpy)) {
 786                 numWrPosted++;
 787                 FD_SET(fd, &writeFdSetPost);
 788                 FD_CLR(fd, &writeFdSet);
 789             }
 790         }
 791     }
 792
 793     if(numRdPosted == 0 && numWrPosted == 0)
 794         return 0;
 795
 796     for(fd = 0; fd <= maxFd; fd++) {
 797         /*
 798          * Do reads and dispatch callback.
 799          */
 800         if(FD_ISSET(fd, &readFdSetPost)
 801            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 802
 803             numRdPosted--;
 804             FD_CLR(fd, &readFdSetPost);
 805             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 806
 807             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 808
 809             procPtr = aioPtr->procPtr;
 810             aioPtr->procPtr = NULL;
 811             clientData = aioPtr->clientData;
 812             aioPtr->inUse = 0;
 813
 814             (*procPtr)(clientData, len);
 815         }
 816
 817         /*
 818          * Do writes and dispatch callback.
 819          */
 820         if(FD_ISSET(fd, &writeFdSetPost) &&
 821            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 822
 823             numWrPosted--;
 824             FD_CLR(fd, &writeFdSetPost);
 825             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 826
 827             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 828
 829             procPtr = aioPtr->procPtr;
 830             aioPtr->procPtr = NULL;
 831             clientData = aioPtr->clientData;
 832             aioPtr->inUse = 0;
 833             (*procPtr)(clientData, len);
 834         }
 835     }
 836     return 0;
 837 }
 838
 839 /*
 840  * Not all systems have strdup().
 841  * @@@ autoconf should determine whether or not this is needed, but for now..
 842  */
 843 static char * str_dup(const char * str)
 844 {
 845     char * sdup = (char *) malloc(strlen(str) + 1);
 846
 847     if (sdup)
 848         strcpy(sdup, str);
 849
 850     return sdup;
 851 }
 852
 853 /*
 854  *----------------------------------------------------------------------
 855  *
 856  * ClientAddrOK --
 857  *
 858  *      Checks if a client address is in a list of allowed addresses
 859  *
 860  * Results:
 861  *      TRUE if address list is empty or client address is present
 862  *      in the list, FALSE otherwise.
 863  *
 864  *----------------------------------------------------------------------
 865  */
 866 static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
 867 {
 868     int result = FALSE;
 869     char *clientListCopy, *cur, *next;
 870
 871     if (clientList == NULL || *clientList == '\0') {
 872         return TRUE;
 873     }
 874
 875     clientListCopy = str_dup(clientList);
 876
 877     for (cur = clientListCopy; cur != NULL; cur = next) {
 878         next = strchr(cur, ',');
 879         if (next != NULL) {
 880             *next++ = '\0';
 881         }
 882         if (inet_addr(cur) == saPtr->sin_addr.s_addr) {
 883             result = TRUE;
 884             break;
 885         }
 886     }
 887
 888     free(clientListCopy);
 889     return result;
 890 }
 891
 892 /*
 893  *----------------------------------------------------------------------
 894  *
 895  * AcquireLock --
 896  *
 897  *      On platforms that implement concurrent calls to accept
 898  *      on a shared listening ipcFd, returns 0.  On other platforms,
 899  *      acquires an exclusive lock across all processes sharing a
 900  *      listening ipcFd, blocking until the lock has been acquired.
 901  *
 902  * Results:
 903  *      0 for successful call, -1 in case of system error (fatal).
 904  *
 905  * Side effects:
 906  *      This process now has the exclusive lock.
 907  *
 908  *----------------------------------------------------------------------
 909  */
 910 static int AcquireLock(int sock, int fail_on_intr)
 911 {
 912 #ifdef USE_LOCKING
 913     do {
 914         struct flock lock;
 915         lock.l_type = F_WRLCK;
 916         lock.l_start = 0;
 917         lock.l_whence = SEEK_SET;
 918         lock.l_len = 0;
 919
 920         if (fcntl(sock, F_SETLKW, &lock) != -1)
 921             return 0;
 922     } while (errno == EINTR && !fail_on_intr);
 923
 924     return -1;
 925
 926 #else
 927     return 0;
 928 #endif
 929 }
 930
 931 /*
 932  *----------------------------------------------------------------------
 933  *
 934  * ReleaseLock --
 935  *
 936  *      On platforms that implement concurrent calls to accept
 937  *      on a shared listening ipcFd, does nothing.  On other platforms,
 938  *      releases an exclusive lock acquired by AcquireLock.
 939  *
 940  * Results:
 941  *      0 for successful call, -1 in case of system error (fatal).
 942  *
 943  * Side effects:
 944  *      This process no longer holds the lock.
 945  *
 946  *----------------------------------------------------------------------
 947  */
 948 static int ReleaseLock(int sock)
 949 {
 950 #ifdef USE_LOCKING
 951     do {
 952         struct flock lock;
 953         lock.l_type = F_UNLCK;
 954         lock.l_start = 0;
 955         lock.l_whence = SEEK_SET;
 956         lock.l_len = 0;
 957
 958         if (fcntl(sock, F_SETLK, &lock) != -1)
 959             return 0;
 960     } while (errno == EINTR);
 961
 962     return -1;
 963
 964 #else
 965     return 0;
 966 #endif
 967 }
 968
 969 /**********************************************************************
 970  * Determine if the errno resulting from a failed accept() warrants a
 971  * retry or exit().  Based on Apache's http_main.c accept() handling
 972  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
 973  */
 974 static int is_reasonable_accept_errno (const int error)
 975 {
 976     switch (error) {
 977 #ifdef EPROTO
 978         /* EPROTO on certain older kernels really means ECONNABORTED, so
 979          * we need to ignore it for them.  See discussion in new-httpd
 980          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
 981          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
 982          * other boxes that implement tcp sockets in userland (i.e. on top of
 983          * STREAMS).  On these systems, EPROTO can actually result in a fatal
 984          * loop.  See PR#981 for example.  It's hard to handle both uses of
 985          * EPROTO. */
 986         case EPROTO:
 987 #endif
 988 #ifdef ECONNABORTED
 989         case ECONNABORTED:
 990 #endif
 991         /* Linux generates the rest of these, other tcp stacks (i.e.
 992          * bsd) tend to hide them behind getsockopt() interfaces.  They
 993          * occur when the net goes sour or the client disconnects after the
 994          * three-way handshake has been done in the kernel but before
 995          * userland has picked up the socket. */
 996 #ifdef ECONNRESET
 997         case ECONNRESET:
 998 #endif
 999 #ifdef ETIMEDOUT
1000         case ETIMEDOUT:
1001 #endif
1002 #ifdef EHOSTUNREACH
1003         case EHOSTUNREACH:
1004 #endif
1005 #ifdef ENETUNREACH
1006         case ENETUNREACH:
1007 #endif
1008             return 1;
1009
1010         default:
1011             return 0;
1012     }
1013 }
1014
1015 /**********************************************************************
1016  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1017  * others?).  When a connect() is made to a Unix Domain socket, but its
1018  * not accept()ed before the web server gets impatient and close()s, an
1019  * accept() results in a valid file descriptor, but no data to read.
1020  * This causes a block on the first read() - which never returns!
1021  *
1022  * Another approach to this is to write() to the socket to provoke a
1023  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1024  * that whatever is written has to be universally ignored by all FastCGI
1025  * web servers, and a SIGPIPE handler has to be installed which returns
1026  * (or SIGPIPE is ignored).
1027  *
1028  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1029  *
1030  * Making it shorter is probably safe, but I'll leave that to you.  Making
1031  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1032  * the faster your application will be able to recover (waiting 2 seconds
1033  * may _cause_ the problem when there is a very high demand). At any rate,
1034  * this is better than perma-blocking.
1035  */
1036 static int is_af_unix_keeper(const int fd)
1037 {
1038     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1039     fd_set read_fds;
1040
1041     FD_ZERO(&read_fds);
1042     FD_SET(fd, &read_fds);
1043
1044     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1045 }
1046
1047 /*
1048  *----------------------------------------------------------------------
1049  *
1050  * OS_Accept --
1051  *
1052  *      Accepts a new FastCGI connection.  This routine knows whether
1053  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1054  *
1055  * Results:
1056  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1057  *
1058  * Side effects:
1059  *      New IPC connection is accepted.
1060  *
1061  *----------------------------------------------------------------------
1062  */
1063 int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
1064 {
1065     int socket;
1066     union {
1067         struct sockaddr_un un;
1068         struct sockaddr_in in;
1069     } sa;
1070
1071     for (;;) {
1072         if (AcquireLock(listen_sock, fail_on_intr))
1073             return -1;
1074
1075         for (;;) {
1076             do {
1077 #ifdef HAVE_SOCKLEN
1078                 socklen_t len = sizeof(sa);
1079 #else
1080                 int len = sizeof(sa);
1081 #endif
1082                 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1083             } while (socket < 0 && errno == EINTR && !fail_on_intr);
1084
1085             if (socket < 0) {
1086                 if (!is_reasonable_accept_errno(errno)) {
1087                     int errnoSave = errno;
1088                     ReleaseLock(listen_sock);
1089                     errno = errnoSave;
1090                     return (-1);
1091                 }
1092                 errno = 0;
1093             }
1094             else {  /* socket >= 0 */
1095                 int set = 1;
1096
1097                 if (sa.in.sin_family != AF_INET)
1098                     break;
1099
1100 #ifdef TCP_NODELAY
1101                 /* No replies to outgoing data, so disable Nagle */
1102                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1103 #endif
1104
1105                 /* Check that the client IP address is approved */
1106                 if (ClientAddrOK(&sa.in, webServerAddrs))
1107                     break;
1108
1109                 close(socket);
1110             }  /* socket >= 0 */
1111         }  /* for(;;) */
1112
1113         if (ReleaseLock(listen_sock))
1114             return (-1);
1115
1116         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1117             break;
1118
1119         close(socket);
1120     }  /* while(1) - lock */
1121
1122     return (socket);
1123 }
1124
1125 /*
1126  *----------------------------------------------------------------------
1127  *
1128  * OS_IpcClose
1129  *
1130  *      OS IPC routine to close an IPC connection.
1131  *
1132  * Results:
1133  *
1134  *
1135  * Side effects:
1136  *      IPC connection is closed.
1137  *
1138  *----------------------------------------------------------------------
1139  */
1140 int OS_IpcClose(int ipcFd)
1141 {
1142     return OS_Close(ipcFd);
1143 }
1144
1145 /*
1146  *----------------------------------------------------------------------
1147  *
1148  * OS_IsFcgi --
1149  *
1150  *      Determines whether this process is a FastCGI process or not.
1151  *
1152  * Results:
1153  *      Returns 1 if FastCGI, 0 if not.
1154  *
1155  * Side effects:
1156  *      None.
1157  *
1158  *----------------------------------------------------------------------
1159  */
1160 int OS_IsFcgi(int sock)
1161 {
1162         union {
1163         struct sockaddr_in in;
1164         struct sockaddr_un un;
1165     } sa;
1166 #ifdef HAVE_SOCKLEN
1167     socklen_t len = sizeof(sa);
1168 #else
1169     int len = sizeof(sa);
1170 #endif
1171
1172     errno = 0;
1173
1174     if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1175         return TRUE;
1176     }
1177     else {
1178         return FALSE;
1179     }
1180 }
1181
1182 /*
1183  *----------------------------------------------------------------------
1184  *
1185  * OS_SetFlags --
1186  *
1187  *      Sets selected flag bits in an open file descriptor.
1188  *
1189  *----------------------------------------------------------------------
1190  */
1191 void OS_SetFlags(int fd, int flags)
1192 {
1193     int val;
1194     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1195         exit(errno);
1196     }
1197     val |= flags;
1198     if(fcntl(fd, F_SETFL, val) < 0) {
1199         exit(errno);
1200     }
1201 }