libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *      Description of file.
   5  *
   6  *
   7  *  Copyright (c) 1995 Open Market, Inc.
   8  *  All rights reserved.
   9  *
  10  *  This file contains proprietary and confidential information and
  11  *  remains the unpublished property of Open Market, Inc. Use,
  12  *  disclosure, or reproduction is prohibited except as permitted by
  13  *  express written license agreement with Open Market, Inc.
  14  *
  15  *  Bill Snapper
  16  *  snapper@openmarket.com
  17  */
  18
  19 #ifndef lint
  20 static const char rcsid[] = "$Id: os_unix.c,v 1.14 1999/08/12 23:56:11 roberts Exp $";
  21 #endif /* not lint */
  22
  23 #include "fcgi_config.h"
  24
  25 #include <arpa/inet.h>
  26 #include <assert.h>
  27 #include <errno.h>
  28 #include <fcntl.h>      /* for fcntl */
  29 #include <math.h>
  30 #include <memory.h>     /* for memchr() */
  31 #include <netinet/tcp.h>
  32 #include <stdarg.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <string.h>
  36 #include <sys/time.h>
  37 #include <sys/types.h>
  38 #include <sys/un.h>
  39
  40 #ifdef HAVE_NETDB_H
  41 #include <netdb.h>
  42 #endif
  43
  44 #ifdef HAVE_NETINET_IN_H
  45 #include <netinet/in.h>
  46 #endif
  47
  48 #ifdef HAVE_SYS_SOCKET_H
  49 #include <sys/socket.h> /* for getpeername */
  50 #endif
  51
  52 #ifdef HAVE_UNISTD_H
  53 #include <unistd.h>
  54 #endif
  55
  56 #include "fastcgi.h"
  57 #include "fcgiapp.h"
  58 #include "fcgiappmisc.h"
  59 #include "fcgimisc.h"
  60 #include "fcgios.h"
  61
  62 #ifndef FALSE
  63 #define FALSE 0
  64 #endif
  65
  66 #ifndef TRUE
  67 #define TRUE 1
  68 #endif
  69
  70 /*
  71  * This structure holds an entry for each oustanding async I/O operation.
  72  */
  73 typedef struct {
  74     OS_AsyncProc procPtr;           /* callout completion procedure */
  75     ClientData clientData;          /* caller private data */
  76     int fd;
  77     int len;
  78     int offset;
  79     void *buf;
  80     int inUse;
  81 } AioInfo;
  82
  83 /*
  84  * Entries in the async I/O table are allocated 2 per file descriptor.
  85  *
  86  * Read Entry Index  = fd * 2
  87  * Write Entry Index = (fd * 2) + 1
  88  */
  89 #define AIO_RD_IX(fd) (fd * 2)
  90 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  91
  92 static int asyncIoTableSize = 16;
  93 static AioInfo *asyncIoTable = NULL;
  94
  95 static int libInitialized = FALSE;
  96
  97 static fd_set readFdSet;
  98 static fd_set writeFdSet;
  99
 100 static fd_set readFdSetPost;
 101 static int numRdPosted = 0;
 102 static fd_set writeFdSetPost;
 103 static int numWrPosted = 0;
 104 static int volatile maxFd = -1;
 105
 106 \f
 107 /*
 108  *--------------------------------------------------------------
 109  *
 110  * OS_LibInit --
 111  *
 112  *      Set up the OS library for use.
 113  *
 114  *      NOTE: This function is really only needed for application
 115  *            asynchronous I/O.  It will most likely change in the
 116  *            future to setup the multi-threaded environment.
 117  *
 118  * Results:
 119  *      Returns 0 if success, -1 if not.
 120  *
 121  * Side effects:
 122  *      Async I/O table allocated and initialized.
 123  *
 124  *--------------------------------------------------------------
 125  */
 126 int OS_LibInit(int stdioFds[3])
 127 {
 128     if(libInitialized)
 129         return 0;
 130
 131     asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
 132     if(asyncIoTable == NULL) {
 133         errno = ENOMEM;
 134         return -1;
 135     }
 136     memset((char *) asyncIoTable, 0,
 137            asyncIoTableSize * sizeof(AioInfo));
 138
 139     FD_ZERO(&readFdSet);
 140     FD_ZERO(&writeFdSet);
 141     FD_ZERO(&readFdSetPost);
 142     FD_ZERO(&writeFdSetPost);
 143     libInitialized = TRUE;
 144     return 0;
 145 }
 146
 147 \f
 148 /*
 149  *--------------------------------------------------------------
 150  *
 151  * OS_LibShutdown --
 152  *
 153  *      Shutdown the OS library.
 154  *
 155  * Results:
 156  *      None.
 157  *
 158  * Side effects:
 159  *      Memory freed, fds closed.
 160  *
 161  *--------------------------------------------------------------
 162  */
 163 void OS_LibShutdown()
 164 {
 165     if(!libInitialized)
 166         return;
 167
 168     free(asyncIoTable);
 169     asyncIoTable = NULL;
 170     libInitialized = FALSE;
 171     return;
 172 }
 173
 174 \f
 175 /*
 176  *----------------------------------------------------------------------
 177  *
 178  * OS_BuildSockAddrUn --
 179  *
 180  *      Using the pathname bindPath, fill in the sockaddr_un structure
 181  *      *servAddrPtr and the length of this structure *servAddrLen.
 182  *
 183  *      The format of the sockaddr_un structure changed incompatibly in
 184  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 185  *      support one or the other.
 186  *
 187  * Results:
 188  *      0 for normal return, -1 for failure (bindPath too long).
 189  *
 190  *----------------------------------------------------------------------
 191  */
 192
 193 static int OS_BuildSockAddrUn(const char *bindPath,
 194                               struct sockaddr_un *servAddrPtr,
 195                               int *servAddrLen)
 196 {
 197     int bindPathLen = strlen(bindPath);
 198
 199 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 200     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 201         return -1;
 202     }
 203 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 204     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 205         return -1;
 206     }
 207 #endif
 208     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 209     servAddrPtr->sun_family = AF_UNIX;
 210     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 211 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 212     *servAddrLen = sizeof(servAddrPtr->sun_len)
 213             + sizeof(servAddrPtr->sun_family)
 214             + bindPathLen + 1;
 215     servAddrPtr->sun_len = *servAddrLen;
 216 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 217     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 218 #endif
 219     return 0;
 220 }
 221 \f
 222 union SockAddrUnion {
 223     struct  sockaddr_un unixVariant;
 224     struct  sockaddr_in inetVariant;
 225 };
 226
 227 \f
 228 /*
 229  * OS_CreateLocalIpcFd --
 230  *
 231  *   This procedure is responsible for creating the listener socket
 232  *   on Unix for local process communication.  It will create a
 233  *   domain socket or a TCP/IP socket bound to "localhost" and return
 234  *   a file descriptor to it to the caller.
 235  *
 236  * Results:
 237  *      Listener socket created.  This call returns either a valid
 238  *      file descriptor or -1 on error.
 239  *
 240  * Side effects:
 241  *      None.
 242  *
 243  *----------------------------------------------------------------------
 244  */
 245 int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
 246 {
 247     int listenSock, servLen;
 248     union   SockAddrUnion sa;
 249     int     tcp = FALSE;
 250     char    *tp;
 251     short   port;
 252     char    host[MAXPATHLEN];
 253
 254     strcpy(host, bindPath);
 255     if((tp = strchr(host, ':')) != 0) {
 256         *tp++ = 0;
 257         if((port = atoi(tp)) == 0) {
 258             *--tp = ':';
 259          } else {
 260             tcp = TRUE;
 261          }
 262     }
 263     if(tcp && (*host && strcmp(host, "localhost") != 0)) {
 264         fprintf(stderr, "To start a service on a TCP port can not "
 265                         "specify a host name.\n"
 266                         "You should either use \"localhost:<port>\" or "
 267                         " just use \":<port>.\"\n");
 268         exit(1);
 269     }
 270
 271     if(tcp) {
 272         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 273         if(listenSock >= 0) {
 274             int flag = 1;
 275             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 276                           (char *) &flag, sizeof(flag)) < 0) {
 277                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 278                 exit(1001);
 279             }
 280         }
 281     } else {
 282         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 283     }
 284     if(listenSock < 0) {
 285         return -1;
 286     }
 287
 288     /*
 289      * Bind the listening socket.
 290      */
 291     if(tcp) {
 292         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 293         sa.inetVariant.sin_family = AF_INET;
 294         sa.inetVariant.sin_addr.s_addr = htonl(INADDR_ANY);
 295         sa.inetVariant.sin_port = htons(port);
 296         servLen = sizeof(sa.inetVariant);
 297     } else {
 298         unlink(bindPath);
 299         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 300             fprintf(stderr, "Listening socket's path name is too long.\n");
 301             exit(1000);
 302         }
 303     }
 304     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 305        || listen(listenSock, backlog) < 0) {
 306         perror("bind/listen");
 307         exit(errno);
 308     }
 309
 310     return listenSock;
 311 }
 312
 313 \f
 314 /*
 315  *----------------------------------------------------------------------
 316  *
 317  * OS_FcgiConnect --
 318  *
 319  *      Create the socket and connect to the remote application if
 320  *      possible.
 321  *
 322  *      This was lifted from the cgi-fcgi application and was abstracted
 323  *      out because Windows NT does not have a domain socket and must
 324  *      use a named pipe which has a different API altogether.
 325  *
 326  * Results:
 327  *      -1 if fail or a valid file descriptor if connection succeeds.
 328  *
 329  * Side effects:
 330  *      Remote connection established.
 331  *
 332  *----------------------------------------------------------------------
 333  */
 334 int OS_FcgiConnect(char *bindPath)
 335 {
 336     union   SockAddrUnion sa;
 337     int servLen, resultSock;
 338     int connectStatus;
 339     char    *tp;
 340     char    host[MAXPATHLEN];
 341     short   port;
 342     int     tcp = FALSE;
 343
 344     strcpy(host, bindPath);
 345     if((tp = strchr(host, ':')) != 0) {
 346         *tp++ = 0;
 347         if((port = atoi(tp)) == 0) {
 348             *--tp = ':';
 349          } else {
 350             tcp = TRUE;
 351          }
 352     }
 353     if(tcp == TRUE) {
 354         struct  hostent *hp;
 355         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 356             fprintf(stderr, "Unknown host: %s\n", bindPath);
 357             exit(1000);
 358         }
 359         sa.inetVariant.sin_family = AF_INET;
 360         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 361         sa.inetVariant.sin_port = htons(port);
 362         servLen = sizeof(sa.inetVariant);
 363         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 364     } else {
 365         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 366             fprintf(stderr, "Listening socket's path name is too long.\n");
 367             exit(1000);
 368         }
 369         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 370     }
 371
 372     assert(resultSock >= 0);
 373     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 374                              servLen);
 375     if(connectStatus >= 0) {
 376         return resultSock;
 377     } else {
 378         /*
 379          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 380          * and no FCGI application server is running.
 381          */
 382         close(resultSock);
 383         return -1;
 384     }
 385 }
 386
 387 \f
 388 /*
 389  *--------------------------------------------------------------
 390  *
 391  * OS_Read --
 392  *
 393  *      Pass through to the unix read function.
 394  *
 395  * Results:
 396  *      Returns number of byes read, 0, or -1 failure: errno
 397  *      contains actual error.
 398  *
 399  * Side effects:
 400  *      None.
 401  *
 402  *--------------------------------------------------------------
 403  */
 404 int OS_Read(int fd, char * buf, size_t len)
 405 {
 406     return(read(fd, buf, len));
 407 }
 408 \f
 409 /*
 410  *--------------------------------------------------------------
 411  *
 412  * OS_Write --
 413  *
 414  *      Pass through to unix write function.
 415  *
 416  * Results:
 417  *      Returns number of byes read, 0, or -1 failure: errno
 418  *      contains actual error.
 419  *
 420  * Side effects:
 421  *      none.
 422  *
 423  *--------------------------------------------------------------
 424  */
 425 int OS_Write(int fd, char * buf, size_t len)
 426 {
 427     return(write(fd, buf, len));
 428 }
 429
 430 \f
 431 /*
 432  *----------------------------------------------------------------------
 433  *
 434  * OS_SpawnChild --
 435  *
 436  *      Spawns a new FastCGI listener process.
 437  *
 438  * Results:
 439  *      0 if success, -1 if error.
 440  *
 441  * Side effects:
 442  *      Child process spawned.
 443  *
 444  *----------------------------------------------------------------------
 445  */
 446 int OS_SpawnChild(char *appPath, int listenFd)
 447 {
 448     int forkResult;
 449
 450     forkResult = fork();
 451     if(forkResult < 0) {
 452         exit(errno);
 453     }
 454
 455     if(forkResult == 0) {
 456         /*
 457          * Close STDIN unconditionally.  It's used by the parent
 458          * process for CGI communication.  The FastCGI applciation
 459          * will be replacing this with the FastCGI listenFd IF
 460          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 461          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 462          * STDERR will be closed as the FastCGI process uses a
 463          * multiplexed socket in their place.
 464          */
 465         close(STDIN_FILENO);
 466
 467         /*
 468          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 469          * we're set.  If not, change it so the child knows where to
 470          * get the listen socket from.
 471          */
 472         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 473             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 474             close(listenFd);
 475         }
 476
 477         close(STDOUT_FILENO);
 478         close(STDERR_FILENO);
 479
 480         /*
 481          * We're a child.  Exec the application.
 482          *
 483          * XXX: entire environment passes through
 484          */
 485         execl(appPath, appPath, NULL);
 486         /*
 487          * XXX: Can't do this as we've already closed STDERR!!!
 488          *
 489          * perror("exec");
 490          */
 491         exit(errno);
 492     }
 493     return 0;
 494 }
 495
 496 \f
 497 /*
 498  *--------------------------------------------------------------
 499  *
 500  * OS_AsyncReadStdin --
 501  *
 502  *      This initiates an asynchronous read on the standard
 503  *      input handle.
 504  *
 505  *      The abstraction is necessary because Windows NT does not
 506  *      have a clean way of "select"ing a file descriptor for
 507  *      I/O.
 508  *
 509  * Results:
 510  *      -1 if error, 0 otherwise.
 511  *
 512  * Side effects:
 513  *      Asynchronous bit is set in the readfd variable and
 514  *      request is enqueued.
 515  *
 516  *--------------------------------------------------------------
 517  */
 518 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 519                       ClientData clientData)
 520 {
 521     int index = AIO_RD_IX(STDIN_FILENO);
 522
 523     ASSERT(asyncIoTable[index].inUse == 0);
 524     asyncIoTable[index].procPtr = procPtr;
 525     asyncIoTable[index].clientData = clientData;
 526     asyncIoTable[index].fd = STDIN_FILENO;
 527     asyncIoTable[index].len = len;
 528     asyncIoTable[index].offset = 0;
 529     asyncIoTable[index].buf = buf;
 530     asyncIoTable[index].inUse = 1;
 531     FD_SET(STDIN_FILENO, &readFdSet);
 532     if(STDIN_FILENO > maxFd)
 533         maxFd = STDIN_FILENO;
 534     return 0;
 535 }
 536
 537 static void GrowAsyncTable(void)
 538 {
 539     int oldTableSize = asyncIoTableSize;
 540
 541     asyncIoTableSize = asyncIoTableSize * 2;
 542     asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 543     if(asyncIoTable == NULL) {
 544         errno = ENOMEM;
 545         exit(errno);
 546     }
 547     memset((char *) &asyncIoTable[oldTableSize], 0,
 548            oldTableSize * sizeof(AioInfo));
 549
 550 }
 551
 552 \f
 553 /*
 554  *--------------------------------------------------------------
 555  *
 556  * OS_AsyncRead --
 557  *
 558  *      This initiates an asynchronous read on the file
 559  *      handle which may be a socket or named pipe.
 560  *
 561  *      We also must save the ProcPtr and ClientData, so later
 562  *      when the io completes, we know who to call.
 563  *
 564  *      We don't look at any results here (the ReadFile may
 565  *      return data if it is cached) but do all completion
 566  *      processing in OS_Select when we get the io completion
 567  *      port done notifications.  Then we call the callback.
 568  *
 569  * Results:
 570  *      -1 if error, 0 otherwise.
 571  *
 572  * Side effects:
 573  *      Asynchronous I/O operation is queued for completion.
 574  *
 575  *--------------------------------------------------------------
 576  */
 577 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 578                  OS_AsyncProc procPtr, ClientData clientData)
 579 {
 580     int index = AIO_RD_IX(fd);
 581
 582     ASSERT(asyncIoTable != NULL);
 583
 584     if(fd > maxFd)
 585         maxFd = fd;
 586
 587     if(index >= asyncIoTableSize) {
 588         GrowAsyncTable();
 589     }
 590
 591     ASSERT(asyncIoTable[index].inUse == 0);
 592     asyncIoTable[index].procPtr = procPtr;
 593     asyncIoTable[index].clientData = clientData;
 594     asyncIoTable[index].fd = fd;
 595     asyncIoTable[index].len = len;
 596     asyncIoTable[index].offset = offset;
 597     asyncIoTable[index].buf = buf;
 598     asyncIoTable[index].inUse = 1;
 599     FD_SET(fd, &readFdSet);
 600     return 0;
 601 }
 602 \f
 603 /*
 604  *--------------------------------------------------------------
 605  *
 606  * OS_AsyncWrite --
 607  *
 608  *      This initiates an asynchronous write on the "fake" file
 609  *      descriptor (which may be a file, socket, or named pipe).
 610  *      We also must save the ProcPtr and ClientData, so later
 611  *      when the io completes, we know who to call.
 612  *
 613  *      We don't look at any results here (the WriteFile generally
 614  *      completes immediately) but do all completion processing
 615  *      in OS_DoIo when we get the io completion port done
 616  *      notifications.  Then we call the callback.
 617  *
 618  * Results:
 619  *      -1 if error, 0 otherwise.
 620  *
 621  * Side effects:
 622  *      Asynchronous I/O operation is queued for completion.
 623  *
 624  *--------------------------------------------------------------
 625  */
 626 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 627                   OS_AsyncProc procPtr, ClientData clientData)
 628 {
 629     int index = AIO_WR_IX(fd);
 630
 631     if(fd > maxFd)
 632         maxFd = fd;
 633
 634     if(index >= asyncIoTableSize) {
 635         GrowAsyncTable();
 636     }
 637
 638     ASSERT(asyncIoTable[index].inUse == 0);
 639     asyncIoTable[index].procPtr = procPtr;
 640     asyncIoTable[index].clientData = clientData;
 641     asyncIoTable[index].fd = fd;
 642     asyncIoTable[index].len = len;
 643     asyncIoTable[index].offset = offset;
 644     asyncIoTable[index].buf = buf;
 645     asyncIoTable[index].inUse = 1;
 646     FD_SET(fd, &writeFdSet);
 647     return 0;
 648 }
 649 \f
 650 /*
 651  *--------------------------------------------------------------
 652  *
 653  * OS_Close --
 654  *
 655  *      Closes the descriptor.  This is a pass through to the
 656  *      Unix close.
 657  *
 658  * Results:
 659  *      0 for success, -1 on failure
 660  *
 661  * Side effects:
 662  *      None.
 663  *
 664  *--------------------------------------------------------------
 665  */
 666 int OS_Close(int fd)
 667 {
 668     int index = AIO_RD_IX(fd);
 669
 670     FD_CLR(fd, &readFdSet);
 671     FD_CLR(fd, &readFdSetPost);
 672     if(asyncIoTable[index].inUse != 0) {
 673         asyncIoTable[index].inUse = 0;
 674     }
 675
 676     FD_CLR(fd, &writeFdSet);
 677     FD_CLR(fd, &writeFdSetPost);
 678     index = AIO_WR_IX(fd);
 679     if(asyncIoTable[index].inUse != 0) {
 680         asyncIoTable[index].inUse = 0;
 681     }
 682     if(maxFd == fd)
 683         maxFd--;
 684     return close(fd);
 685 }
 686 \f
 687 /*
 688  *--------------------------------------------------------------
 689  *
 690  * OS_CloseRead --
 691  *
 692  *      Cancel outstanding asynchronous reads and prevent subsequent
 693  *      reads from completing.
 694  *
 695  * Results:
 696  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 697  *              0 success, -1 failure
 698  *
 699  *--------------------------------------------------------------
 700  */
 701 int OS_CloseRead(int fd)
 702 {
 703     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 704         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 705         FD_CLR(fd, &readFdSet);
 706     }
 707
 708     return shutdown(fd, 0);
 709 }
 710
 711 \f
 712 /*
 713  *--------------------------------------------------------------
 714  *
 715  * OS_DoIo --
 716  *
 717  *      This function was formerly OS_Select.  It's purpose is
 718  *      to pull I/O completion events off the queue and dispatch
 719  *      them to the appropriate place.
 720  *
 721  * Results:
 722  *      Returns 0.
 723  *
 724  * Side effects:
 725  *      Handlers are called.
 726  *
 727  *--------------------------------------------------------------
 728  */
 729 int OS_DoIo(struct timeval *tmo)
 730 {
 731     int fd, len, selectStatus;
 732     OS_AsyncProc procPtr;
 733     ClientData clientData;
 734     AioInfo *aioPtr;
 735     fd_set readFdSetCpy;
 736     fd_set writeFdSetCpy;
 737
 738     FD_ZERO(&readFdSetCpy);
 739     FD_ZERO(&writeFdSetCpy);
 740
 741     for(fd = 0; fd <= maxFd; fd++) {
 742         if(FD_ISSET(fd, &readFdSet)) {
 743             FD_SET(fd, &readFdSetCpy);
 744         }
 745         if(FD_ISSET(fd, &writeFdSet)) {
 746             FD_SET(fd, &writeFdSetCpy);
 747         }
 748     }
 749
 750     /*
 751      * If there were no completed events from a prior call, see if there's
 752      * any work to do.
 753      */
 754     if(numRdPosted == 0 && numWrPosted == 0) {
 755         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 756                               NULL, tmo);
 757         if(selectStatus < 0) {
 758             exit(errno);
 759         }
 760
 761         for(fd = 0; fd <= maxFd; fd++) {
 762             /*
 763              * Build up a list of completed events.  We'll work off of
 764              * this list as opposed to looping through the read and write
 765              * fd sets since they can be affected by a callbacl routine.
 766              */
 767             if(FD_ISSET(fd, &readFdSetCpy)) {
 768                 numRdPosted++;
 769                 FD_SET(fd, &readFdSetPost);
 770                 FD_CLR(fd, &readFdSet);
 771             }
 772
 773             if(FD_ISSET(fd, &writeFdSetCpy)) {
 774                 numWrPosted++;
 775                 FD_SET(fd, &writeFdSetPost);
 776                 FD_CLR(fd, &writeFdSet);
 777             }
 778         }
 779     }
 780
 781     if(numRdPosted == 0 && numWrPosted == 0)
 782         return 0;
 783
 784     for(fd = 0; fd <= maxFd; fd++) {
 785         /*
 786          * Do reads and dispatch callback.
 787          */
 788         if(FD_ISSET(fd, &readFdSetPost)
 789            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 790
 791             numRdPosted--;
 792             FD_CLR(fd, &readFdSetPost);
 793             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 794
 795             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 796
 797             procPtr = aioPtr->procPtr;
 798             aioPtr->procPtr = NULL;
 799             clientData = aioPtr->clientData;
 800             aioPtr->inUse = 0;
 801
 802             (*procPtr)(clientData, len);
 803         }
 804
 805         /*
 806          * Do writes and dispatch callback.
 807          */
 808         if(FD_ISSET(fd, &writeFdSetPost) &&
 809            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 810
 811             numWrPosted--;
 812             FD_CLR(fd, &writeFdSetPost);
 813             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 814
 815             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 816
 817             procPtr = aioPtr->procPtr;
 818             aioPtr->procPtr = NULL;
 819             clientData = aioPtr->clientData;
 820             aioPtr->inUse = 0;
 821             (*procPtr)(clientData, len);
 822         }
 823     }
 824     return 0;
 825 }
 826
 827 \f
 828 /*
 829  *----------------------------------------------------------------------
 830  *
 831  * ClientAddrOK --
 832  *
 833  *      Checks if a client address is in a list of allowed addresses
 834  *
 835  * Results:
 836  *      TRUE if address list is empty or client address is present
 837  *      in the list, FALSE otherwise.
 838  *
 839  *----------------------------------------------------------------------
 840  */
 841 static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
 842 {
 843     int result = FALSE;
 844     char *clientListCopy, *cur, *next;
 845     char *newString = NULL;
 846     int strLen;
 847
 848     if(clientList == NULL || *clientList == '\0') {
 849         return TRUE;
 850     }
 851
 852     strLen = strlen(clientList);
 853     clientListCopy = (char *)malloc(strLen + 1);
 854     assert(newString != NULL);
 855     memcpy(newString, clientList, strLen);
 856     newString[strLen] = '\000';
 857
 858     for(cur = clientListCopy; cur != NULL; cur = next) {
 859         next = strchr(cur, ',');
 860         if(next != NULL) {
 861             *next++ = '\0';
 862         }
 863         if(inet_addr(cur) == saPtr->sin_addr.s_addr) {
 864             result = TRUE;
 865             break;
 866         }
 867     }
 868     free(clientListCopy);
 869     return result;
 870 }
 871
 872 \f
 873 /*
 874  *----------------------------------------------------------------------
 875  *
 876  * AcquireLock --
 877  *
 878  *      On platforms that implement concurrent calls to accept
 879  *      on a shared listening ipcFd, returns 0.  On other platforms,
 880  *      acquires an exclusive lock across all processes sharing a
 881  *      listening ipcFd, blocking until the lock has been acquired.
 882  *
 883  * Results:
 884  *      0 for successful call, -1 in case of system error (fatal).
 885  *
 886  * Side effects:
 887  *      This process now has the exclusive lock.
 888  *
 889  *----------------------------------------------------------------------
 890  */
 891 static int AcquireLock(int sock, int fail_on_intr)
 892 {
 893 #ifdef USE_LOCKING
 894     do {
 895         struct flock lock;
 896         lock.l_type = F_WRLCK;
 897         lock.l_start = 0;
 898         lock.l_whence = SEEK_SET;
 899         lock.l_len = 0;
 900
 901         if (fcntl(sock, F_SETLKW, &lock) != -1)
 902             return 0;
 903     } while (errno == EINTR && !fail_on_intr);
 904
 905     return -1;
 906
 907 #else
 908     return 0;
 909 #endif
 910 }
 911 \f
 912 /*
 913  *----------------------------------------------------------------------
 914  *
 915  * ReleaseLock --
 916  *
 917  *      On platforms that implement concurrent calls to accept
 918  *      on a shared listening ipcFd, does nothing.  On other platforms,
 919  *      releases an exclusive lock acquired by AcquireLock.
 920  *
 921  * Results:
 922  *      0 for successful call, -1 in case of system error (fatal).
 923  *
 924  * Side effects:
 925  *      This process no longer holds the lock.
 926  *
 927  *----------------------------------------------------------------------
 928  */
 929 static int ReleaseLock(int sock)
 930 {
 931 #ifdef USE_LOCKING
 932     do {
 933         struct flock lock;
 934         lock.l_type = F_UNLCK;
 935         lock.l_start = 0;
 936         lock.l_whence = SEEK_SET;
 937         lock.l_len = 0;
 938
 939         if (fcntl(sock, F_SETLK, &lock) != -1)
 940             return 0;
 941     } while (errno == EINTR);
 942
 943     return -1;
 944
 945 #else
 946     return 0;
 947 #endif
 948 }
 949
 950 \f
 951 /**********************************************************************
 952  * Determine if the errno resulting from a failed accept() warrants a
 953  * retry or exit().  Based on Apache's http_main.c accept() handling
 954  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
 955  */
 956 static int is_reasonable_accept_errno (const int error)
 957 {
 958     switch (error) {
 959 #ifdef EPROTO
 960         /* EPROTO on certain older kernels really means ECONNABORTED, so
 961          * we need to ignore it for them.  See discussion in new-httpd
 962          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
 963          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
 964          * other boxes that implement tcp sockets in userland (i.e. on top of
 965          * STREAMS).  On these systems, EPROTO can actually result in a fatal
 966          * loop.  See PR#981 for example.  It's hard to handle both uses of
 967          * EPROTO. */
 968         case EPROTO:
 969 #endif
 970 #ifdef ECONNABORTED
 971         case ECONNABORTED:
 972 #endif
 973         /* Linux generates the rest of these, other tcp stacks (i.e.
 974          * bsd) tend to hide them behind getsockopt() interfaces.  They
 975          * occur when the net goes sour or the client disconnects after the
 976          * three-way handshake has been done in the kernel but before
 977          * userland has picked up the socket. */
 978 #ifdef ECONNRESET
 979         case ECONNRESET:
 980 #endif
 981 #ifdef ETIMEDOUT
 982         case ETIMEDOUT:
 983 #endif
 984 #ifdef EHOSTUNREACH
 985         case EHOSTUNREACH:
 986 #endif
 987 #ifdef ENETUNREACH
 988         case ENETUNREACH:
 989 #endif
 990             return 1;
 991
 992         default:
 993             return 0;
 994     }
 995 }
 996
 997 /**********************************************************************
 998  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
 999  * others?).  When a connect() is made to a Unix Domain socket, but its
1000  * not accept()ed before the web server gets impatient and close()s, an
1001  * accept() results in a valid file descriptor, but no data to read.
1002  * This causes a block on the first read() - which never returns!
1003  *
1004  * Another approach to this is to write() to the socket to provoke a
1005  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1006  * that whatever is written has to be universally ignored by all FastCGI
1007  * web servers, and a SIGPIPE handler has to be installed which returns
1008  * (or SIGPIPE is ignored).
1009  *
1010  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1011  *
1012  * Making it shorter is probably safe, but I'll leave that to you.  Making
1013  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1014  * the faster your application will be able to recover (waiting 2 seconds
1015  * may _cause_ the problem when there is a very high demand). At any rate,
1016  * this is better than perma-blocking.
1017  */
1018 static int is_af_unix_keeper(const int fd)
1019 {
1020     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1021     fd_set read_fds;
1022
1023     FD_ZERO(&read_fds);
1024     FD_SET(fd, &read_fds);
1025
1026     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1027 }
1028
1029 /*
1030  *----------------------------------------------------------------------
1031  *
1032  * OS_Accept --
1033  *
1034  *      Accepts a new FastCGI connection.  This routine knows whether
1035  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1036  *
1037  * Results:
1038  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1039  *
1040  * Side effects:
1041  *      New IPC connection is accepted.
1042  *
1043  *----------------------------------------------------------------------
1044  */
1045 int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
1046 {
1047     int socket;
1048     union {
1049         struct sockaddr_un un;
1050         struct sockaddr_in in;
1051     } sa;
1052
1053     for (;;) {
1054         if (AcquireLock(listen_sock, fail_on_intr))
1055             return -1;
1056
1057         for (;;) {
1058             do {
1059 #ifdef HAVE_SOCKLEN
1060                 socklen_t len = sizeof(sa);
1061 #else
1062                 int len = sizeof(sa);
1063 #endif
1064                 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1065             } while (socket < 0 && errno == EINTR && !fail_on_intr);
1066
1067             if (socket < 0) {
1068                 if (!is_reasonable_accept_errno(errno)) {
1069                     int errnoSave = errno;
1070                     ReleaseLock(listen_sock);
1071                     errno = errnoSave;
1072                     return (-1);
1073                 }
1074                 errno = 0;
1075             }
1076             else {  /* socket >= 0 */
1077                 int set = 1;
1078
1079                 if (sa.in.sin_family != AF_INET)
1080                     break;
1081
1082 #ifdef TCP_NODELAY
1083                 /* No replies to outgoing data, so disable Nagle */
1084                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1085 #endif
1086
1087                 /* Check that the client IP address is approved */
1088                 if (ClientAddrOK(&sa.in, webServerAddrs))
1089                     break;
1090
1091                 close(socket);
1092             }  /* socket >= 0 */
1093         }  /* for(;;) */
1094
1095         if (ReleaseLock(listen_sock))
1096             return (-1);
1097
1098         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1099             break;
1100
1101         close(socket);
1102     }  /* while(1) - lock */
1103
1104     return (socket);
1105 }
1106 \f
1107 /*
1108  *----------------------------------------------------------------------
1109  *
1110  * OS_IpcClose
1111  *
1112  *      OS IPC routine to close an IPC connection.
1113  *
1114  * Results:
1115  *
1116  *
1117  * Side effects:
1118  *      IPC connection is closed.
1119  *
1120  *----------------------------------------------------------------------
1121  */
1122 int OS_IpcClose(int ipcFd)
1123 {
1124     return OS_Close(ipcFd);
1125 }
1126
1127 \f
1128 /*
1129  *----------------------------------------------------------------------
1130  *
1131  * OS_IsFcgi --
1132  *
1133  *      Determines whether this process is a FastCGI process or not.
1134  *
1135  * Results:
1136  *      Returns 1 if FastCGI, 0 if not.
1137  *
1138  * Side effects:
1139  *      None.
1140  *
1141  *----------------------------------------------------------------------
1142  */
1143 int OS_IsFcgi(int sock)
1144 {
1145         union {
1146         struct sockaddr_in in;
1147         struct sockaddr_un un;
1148     } sa;
1149 #ifdef HAVE_SOCKLEN
1150     socklen_t len = sizeof(sa);
1151 #else
1152     int len = sizeof(sa);
1153 #endif
1154
1155     if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1156         return TRUE;
1157     }
1158     else {
1159         return FALSE;
1160     }
1161 }
1162 \f
1163 /*
1164  *----------------------------------------------------------------------
1165  *
1166  * OS_SetFlags --
1167  *
1168  *      Sets selected flag bits in an open file descriptor.
1169  *
1170  *----------------------------------------------------------------------
1171  */
1172 void OS_SetFlags(int fd, int flags)
1173 {
1174     int val;
1175     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1176         exit(errno);
1177     }
1178     val |= flags;
1179     if(fcntl(fd, F_SETFL, val) < 0) {
1180         exit(errno);
1181     }
1182 }