libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *      Description of file.
   5  *
   6  *
   7  *  Copyright (c) 1995 Open Market, Inc.
   8  *  All rights reserved.
   9  *
  10  *  This file contains proprietary and confidential information and
  11  *  remains the unpublished property of Open Market, Inc. Use,
  12  *  disclosure, or reproduction is prohibited except as permitted by
  13  *  express written license agreement with Open Market, Inc.
  14  *
  15  *  Bill Snapper
  16  *  snapper@openmarket.com
  17  */
  18
  19 #ifndef lint
  20 static const char rcsid[] = "$Id: os_unix.c,v 1.22 2001/03/27 13:18:08 robs Exp $";
  21 #endif /* not lint */
  22
  23 #include "fcgi_config.h"
  24
  25 #include <sys/types.h>
  26
  27 #ifdef HAVE_NETINET_IN_H
  28 #include <netinet/in.h>
  29 #endif
  30
  31 #include <arpa/inet.h>
  32 #include <assert.h>
  33 #include <errno.h>
  34 #include <fcntl.h>      /* for fcntl */
  35 #include <math.h>
  36 #include <memory.h>     /* for memchr() */
  37 #include <netinet/tcp.h>
  38 #include <stdarg.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <sys/time.h>
  43 #include <sys/un.h>
  44
  45 #ifdef HAVE_NETDB_H
  46 #include <netdb.h>
  47 #endif
  48
  49 #ifdef HAVE_SYS_SOCKET_H
  50 #include <sys/socket.h> /* for getpeername */
  51 #endif
  52
  53 #ifdef HAVE_UNISTD_H
  54 #include <unistd.h>
  55 #endif
  56
  57 #include "fastcgi.h"
  58 #include "fcgiapp.h"
  59 #include "fcgiappmisc.h"
  60 #include "fcgimisc.h"
  61 #include "fcgios.h"
  62
  63 #ifndef FALSE
  64 #define FALSE 0
  65 #endif
  66
  67 #ifndef TRUE
  68 #define TRUE 1
  69 #endif
  70
  71 /*
  72  * This structure holds an entry for each oustanding async I/O operation.
  73  */
  74 typedef struct {
  75     OS_AsyncProc procPtr;           /* callout completion procedure */
  76     ClientData clientData;          /* caller private data */
  77     int fd;
  78     int len;
  79     int offset;
  80     void *buf;
  81     int inUse;
  82 } AioInfo;
  83
  84 /*
  85  * Entries in the async I/O table are allocated 2 per file descriptor.
  86  *
  87  * Read Entry Index  = fd * 2
  88  * Write Entry Index = (fd * 2) + 1
  89  */
  90 #define AIO_RD_IX(fd) (fd * 2)
  91 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  92
  93 static int asyncIoInUse = FALSE;
  94 static int asyncIoTableSize = 16;
  95 static AioInfo *asyncIoTable = NULL;
  96
  97 static int libInitialized = FALSE;
  98
  99 static fd_set readFdSet;
 100 static fd_set writeFdSet;
 101
 102 static fd_set readFdSetPost;
 103 static int numRdPosted = 0;
 104 static fd_set writeFdSetPost;
 105 static int numWrPosted = 0;
 106 static int volatile maxFd = -1;
 107
 108 \f
 109 /*
 110  *--------------------------------------------------------------
 111  *
 112  * OS_LibInit --
 113  *
 114  *      Set up the OS library for use.
 115  *
 116  *      NOTE: This function is really only needed for application
 117  *            asynchronous I/O.  It will most likely change in the
 118  *            future to setup the multi-threaded environment.
 119  *
 120  * Results:
 121  *      Returns 0 if success, -1 if not.
 122  *
 123  * Side effects:
 124  *      Async I/O table allocated and initialized.
 125  *
 126  *--------------------------------------------------------------
 127  */
 128 int OS_LibInit(int stdioFds[3])
 129 {
 130     if(libInitialized)
 131         return 0;
 132
 133     asyncIoTable = (AioInfo *)malloc(asyncIoTableSize * sizeof(AioInfo));
 134     if(asyncIoTable == NULL) {
 135         errno = ENOMEM;
 136         return -1;
 137     }
 138     memset((char *) asyncIoTable, 0,
 139            asyncIoTableSize * sizeof(AioInfo));
 140
 141     FD_ZERO(&readFdSet);
 142     FD_ZERO(&writeFdSet);
 143     FD_ZERO(&readFdSetPost);
 144     FD_ZERO(&writeFdSetPost);
 145     libInitialized = TRUE;
 146     return 0;
 147 }
 148
 149 \f
 150 /*
 151  *--------------------------------------------------------------
 152  *
 153  * OS_LibShutdown --
 154  *
 155  *      Shutdown the OS library.
 156  *
 157  * Results:
 158  *      None.
 159  *
 160  * Side effects:
 161  *      Memory freed, fds closed.
 162  *
 163  *--------------------------------------------------------------
 164  */
 165 void OS_LibShutdown()
 166 {
 167     if(!libInitialized)
 168         return;
 169
 170     free(asyncIoTable);
 171     asyncIoTable = NULL;
 172     libInitialized = FALSE;
 173     return;
 174 }
 175
 176 \f
 177 /*
 178  *----------------------------------------------------------------------
 179  *
 180  * OS_BuildSockAddrUn --
 181  *
 182  *      Using the pathname bindPath, fill in the sockaddr_un structure
 183  *      *servAddrPtr and the length of this structure *servAddrLen.
 184  *
 185  *      The format of the sockaddr_un structure changed incompatibly in
 186  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 187  *      support one or the other.
 188  *
 189  * Results:
 190  *      0 for normal return, -1 for failure (bindPath too long).
 191  *
 192  *----------------------------------------------------------------------
 193  */
 194
 195 static int OS_BuildSockAddrUn(const char *bindPath,
 196                               struct sockaddr_un *servAddrPtr,
 197                               int *servAddrLen)
 198 {
 199     int bindPathLen = strlen(bindPath);
 200
 201 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 202     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 203         return -1;
 204     }
 205 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 206     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 207         return -1;
 208     }
 209 #endif
 210     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 211     servAddrPtr->sun_family = AF_UNIX;
 212     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 213 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 214     *servAddrLen = sizeof(servAddrPtr->sun_len)
 215             + sizeof(servAddrPtr->sun_family)
 216             + bindPathLen + 1;
 217     servAddrPtr->sun_len = *servAddrLen;
 218 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 219     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 220 #endif
 221     return 0;
 222 }
 223 \f
 224 union SockAddrUnion {
 225     struct  sockaddr_un unixVariant;
 226     struct  sockaddr_in inetVariant;
 227 };
 228
 229 \f
 230 /*
 231  * OS_CreateLocalIpcFd --
 232  *
 233  *   This procedure is responsible for creating the listener socket
 234  *   on Unix for local process communication.  It will create a
 235  *   domain socket or a TCP/IP socket bound to "localhost" and return
 236  *   a file descriptor to it to the caller.
 237  *
 238  * Results:
 239  *      Listener socket created.  This call returns either a valid
 240  *      file descriptor or -1 on error.
 241  *
 242  * Side effects:
 243  *      None.
 244  *
 245  *----------------------------------------------------------------------
 246  */
 247 int OS_CreateLocalIpcFd(const char *bindPath, int backlog)
 248 {
 249     int listenSock, servLen;
 250     union   SockAddrUnion sa;
 251     int     tcp = FALSE;
 252     char    *tp;
 253     short   port;
 254     char    host[MAXPATHLEN];
 255
 256     strcpy(host, bindPath);
 257     if((tp = strchr(host, ':')) != 0) {
 258         *tp++ = 0;
 259         if((port = atoi(tp)) == 0) {
 260             *--tp = ':';
 261          } else {
 262             tcp = TRUE;
 263          }
 264     }
 265     if(tcp && (*host && strcmp(host, "localhost") != 0)) {
 266         fprintf(stderr, "To start a service on a TCP port can not "
 267                         "specify a host name.\n"
 268                         "You should either use \"localhost:<port>\" or "
 269                         " just use \":<port>.\"\n");
 270         exit(1);
 271     }
 272
 273     if(tcp) {
 274         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 275         if(listenSock >= 0) {
 276             int flag = 1;
 277             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 278                           (char *) &flag, sizeof(flag)) < 0) {
 279                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 280                 exit(1001);
 281             }
 282         }
 283     } else {
 284         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 285     }
 286     if(listenSock < 0) {
 287         return -1;
 288     }
 289
 290     /*
 291      * Bind the listening socket.
 292      */
 293     if(tcp) {
 294         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 295         sa.inetVariant.sin_family = AF_INET;
 296         sa.inetVariant.sin_addr.s_addr = htonl(INADDR_ANY);
 297         sa.inetVariant.sin_port = htons(port);
 298         servLen = sizeof(sa.inetVariant);
 299     } else {
 300         unlink(bindPath);
 301         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 302             fprintf(stderr, "Listening socket's path name is too long.\n");
 303             exit(1000);
 304         }
 305     }
 306     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 307        || listen(listenSock, backlog) < 0) {
 308         perror("bind/listen");
 309         exit(errno);
 310     }
 311
 312     return listenSock;
 313 }
 314
 315 \f
 316 /*
 317  *----------------------------------------------------------------------
 318  *
 319  * OS_FcgiConnect --
 320  *
 321  *      Create the socket and connect to the remote application if
 322  *      possible.
 323  *
 324  *      This was lifted from the cgi-fcgi application and was abstracted
 325  *      out because Windows NT does not have a domain socket and must
 326  *      use a named pipe which has a different API altogether.
 327  *
 328  * Results:
 329  *      -1 if fail or a valid file descriptor if connection succeeds.
 330  *
 331  * Side effects:
 332  *      Remote connection established.
 333  *
 334  *----------------------------------------------------------------------
 335  */
 336 int OS_FcgiConnect(char *bindPath)
 337 {
 338     union   SockAddrUnion sa;
 339     int servLen, resultSock;
 340     int connectStatus;
 341     char    *tp;
 342     char    host[MAXPATHLEN];
 343     short   port;
 344     int     tcp = FALSE;
 345
 346     strcpy(host, bindPath);
 347     if((tp = strchr(host, ':')) != 0) {
 348         *tp++ = 0;
 349         if((port = atoi(tp)) == 0) {
 350             *--tp = ':';
 351          } else {
 352             tcp = TRUE;
 353          }
 354     }
 355     if(tcp == TRUE) {
 356         struct  hostent *hp;
 357         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 358             fprintf(stderr, "Unknown host: %s\n", bindPath);
 359             exit(1000);
 360         }
 361         sa.inetVariant.sin_family = AF_INET;
 362         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 363         sa.inetVariant.sin_port = htons(port);
 364         servLen = sizeof(sa.inetVariant);
 365         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 366     } else {
 367         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 368             fprintf(stderr, "Listening socket's path name is too long.\n");
 369             exit(1000);
 370         }
 371         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 372     }
 373
 374     ASSERT(resultSock >= 0);
 375     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 376                              servLen);
 377     if(connectStatus >= 0) {
 378         return resultSock;
 379     } else {
 380         /*
 381          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 382          * and no FCGI application server is running.
 383          */
 384         close(resultSock);
 385         return -1;
 386     }
 387 }
 388
 389 \f
 390 /*
 391  *--------------------------------------------------------------
 392  *
 393  * OS_Read --
 394  *
 395  *      Pass through to the unix read function.
 396  *
 397  * Results:
 398  *      Returns number of byes read, 0, or -1 failure: errno
 399  *      contains actual error.
 400  *
 401  * Side effects:
 402  *      None.
 403  *
 404  *--------------------------------------------------------------
 405  */
 406 int OS_Read(int fd, char * buf, size_t len)
 407 {
 408     return(read(fd, buf, len));
 409 }
 410 \f
 411 /*
 412  *--------------------------------------------------------------
 413  *
 414  * OS_Write --
 415  *
 416  *      Pass through to unix write function.
 417  *
 418  * Results:
 419  *      Returns number of byes read, 0, or -1 failure: errno
 420  *      contains actual error.
 421  *
 422  * Side effects:
 423  *      none.
 424  *
 425  *--------------------------------------------------------------
 426  */
 427 int OS_Write(int fd, char * buf, size_t len)
 428 {
 429     return(write(fd, buf, len));
 430 }
 431
 432 \f
 433 /*
 434  *----------------------------------------------------------------------
 435  *
 436  * OS_SpawnChild --
 437  *
 438  *      Spawns a new FastCGI listener process.
 439  *
 440  * Results:
 441  *      0 if success, -1 if error.
 442  *
 443  * Side effects:
 444  *      Child process spawned.
 445  *
 446  *----------------------------------------------------------------------
 447  */
 448 int OS_SpawnChild(char *appPath, int listenFd)
 449 {
 450     int forkResult;
 451
 452     forkResult = fork();
 453     if(forkResult < 0) {
 454         exit(errno);
 455     }
 456
 457     if(forkResult == 0) {
 458         /*
 459          * Close STDIN unconditionally.  It's used by the parent
 460          * process for CGI communication.  The FastCGI applciation
 461          * will be replacing this with the FastCGI listenFd IF
 462          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 463          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 464          * STDERR will be closed as the FastCGI process uses a
 465          * multiplexed socket in their place.
 466          */
 467         close(STDIN_FILENO);
 468
 469         /*
 470          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 471          * we're set.  If not, change it so the child knows where to
 472          * get the listen socket from.
 473          */
 474         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 475             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 476             close(listenFd);
 477         }
 478
 479         close(STDOUT_FILENO);
 480         close(STDERR_FILENO);
 481
 482         /*
 483          * We're a child.  Exec the application.
 484          *
 485          * XXX: entire environment passes through
 486          */
 487         execl(appPath, appPath, NULL);
 488         /*
 489          * XXX: Can't do this as we've already closed STDERR!!!
 490          *
 491          * perror("exec");
 492          */
 493         exit(errno);
 494     }
 495     return 0;
 496 }
 497
 498 \f
 499 /*
 500  *--------------------------------------------------------------
 501  *
 502  * OS_AsyncReadStdin --
 503  *
 504  *      This initiates an asynchronous read on the standard
 505  *      input handle.
 506  *
 507  *      The abstraction is necessary because Windows NT does not
 508  *      have a clean way of "select"ing a file descriptor for
 509  *      I/O.
 510  *
 511  * Results:
 512  *      -1 if error, 0 otherwise.
 513  *
 514  * Side effects:
 515  *      Asynchronous bit is set in the readfd variable and
 516  *      request is enqueued.
 517  *
 518  *--------------------------------------------------------------
 519  */
 520 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 521                       ClientData clientData)
 522 {
 523     int index = AIO_RD_IX(STDIN_FILENO);
 524
 525     asyncIoInUse = TRUE;
 526     ASSERT(asyncIoTable[index].inUse == 0);
 527     asyncIoTable[index].procPtr = procPtr;
 528     asyncIoTable[index].clientData = clientData;
 529     asyncIoTable[index].fd = STDIN_FILENO;
 530     asyncIoTable[index].len = len;
 531     asyncIoTable[index].offset = 0;
 532     asyncIoTable[index].buf = buf;
 533     asyncIoTable[index].inUse = 1;
 534     FD_SET(STDIN_FILENO, &readFdSet);
 535     if(STDIN_FILENO > maxFd)
 536         maxFd = STDIN_FILENO;
 537     return 0;
 538 }
 539
 540 static void GrowAsyncTable(void)
 541 {
 542     int oldTableSize = asyncIoTableSize;
 543
 544     asyncIoTableSize = asyncIoTableSize * 2;
 545     asyncIoTable = (AioInfo *)realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 546     if(asyncIoTable == NULL) {
 547         errno = ENOMEM;
 548         exit(errno);
 549     }
 550     memset((char *) &asyncIoTable[oldTableSize], 0,
 551            oldTableSize * sizeof(AioInfo));
 552
 553 }
 554
 555 \f
 556 /*
 557  *--------------------------------------------------------------
 558  *
 559  * OS_AsyncRead --
 560  *
 561  *      This initiates an asynchronous read on the file
 562  *      handle which may be a socket or named pipe.
 563  *
 564  *      We also must save the ProcPtr and ClientData, so later
 565  *      when the io completes, we know who to call.
 566  *
 567  *      We don't look at any results here (the ReadFile may
 568  *      return data if it is cached) but do all completion
 569  *      processing in OS_Select when we get the io completion
 570  *      port done notifications.  Then we call the callback.
 571  *
 572  * Results:
 573  *      -1 if error, 0 otherwise.
 574  *
 575  * Side effects:
 576  *      Asynchronous I/O operation is queued for completion.
 577  *
 578  *--------------------------------------------------------------
 579  */
 580 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 581                  OS_AsyncProc procPtr, ClientData clientData)
 582 {
 583     int index = AIO_RD_IX(fd);
 584
 585     ASSERT(asyncIoTable != NULL);
 586     asyncIoInUse = TRUE;
 587
 588     if(fd > maxFd)
 589         maxFd = fd;
 590
 591     if(index >= asyncIoTableSize) {
 592         GrowAsyncTable();
 593     }
 594
 595     ASSERT(asyncIoTable[index].inUse == 0);
 596     asyncIoTable[index].procPtr = procPtr;
 597     asyncIoTable[index].clientData = clientData;
 598     asyncIoTable[index].fd = fd;
 599     asyncIoTable[index].len = len;
 600     asyncIoTable[index].offset = offset;
 601     asyncIoTable[index].buf = buf;
 602     asyncIoTable[index].inUse = 1;
 603     FD_SET(fd, &readFdSet);
 604     return 0;
 605 }
 606 \f
 607 /*
 608  *--------------------------------------------------------------
 609  *
 610  * OS_AsyncWrite --
 611  *
 612  *      This initiates an asynchronous write on the "fake" file
 613  *      descriptor (which may be a file, socket, or named pipe).
 614  *      We also must save the ProcPtr and ClientData, so later
 615  *      when the io completes, we know who to call.
 616  *
 617  *      We don't look at any results here (the WriteFile generally
 618  *      completes immediately) but do all completion processing
 619  *      in OS_DoIo when we get the io completion port done
 620  *      notifications.  Then we call the callback.
 621  *
 622  * Results:
 623  *      -1 if error, 0 otherwise.
 624  *
 625  * Side effects:
 626  *      Asynchronous I/O operation is queued for completion.
 627  *
 628  *--------------------------------------------------------------
 629  */
 630 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 631                   OS_AsyncProc procPtr, ClientData clientData)
 632 {
 633     int index = AIO_WR_IX(fd);
 634
 635     asyncIoInUse = TRUE;
 636
 637     if(fd > maxFd)
 638         maxFd = fd;
 639
 640     if(index >= asyncIoTableSize) {
 641         GrowAsyncTable();
 642     }
 643
 644     ASSERT(asyncIoTable[index].inUse == 0);
 645     asyncIoTable[index].procPtr = procPtr;
 646     asyncIoTable[index].clientData = clientData;
 647     asyncIoTable[index].fd = fd;
 648     asyncIoTable[index].len = len;
 649     asyncIoTable[index].offset = offset;
 650     asyncIoTable[index].buf = buf;
 651     asyncIoTable[index].inUse = 1;
 652     FD_SET(fd, &writeFdSet);
 653     return 0;
 654 }
 655 \f
 656 /*
 657  *--------------------------------------------------------------
 658  *
 659  * OS_Close --
 660  *
 661  *      Closes the descriptor.  This is a pass through to the
 662  *      Unix close.
 663  *
 664  * Results:
 665  *      0 for success, -1 on failure
 666  *
 667  * Side effects:
 668  *      None.
 669  *
 670  *--------------------------------------------------------------
 671  */
 672 int OS_Close(int fd)
 673 {
 674     if (fd == -1)
 675         return 0;
 676
 677     if (asyncIoInUse) {
 678         int index = AIO_RD_IX(fd);
 679
 680         FD_CLR(fd, &readFdSet);
 681         FD_CLR(fd, &readFdSetPost);
 682         if (asyncIoTable[index].inUse != 0) {
 683             asyncIoTable[index].inUse = 0;
 684         }
 685
 686         FD_CLR(fd, &writeFdSet);
 687         FD_CLR(fd, &writeFdSetPost);
 688         index = AIO_WR_IX(fd);
 689         if (asyncIoTable[index].inUse != 0) {
 690             asyncIoTable[index].inUse = 0;
 691         }
 692
 693         if (maxFd == fd) {
 694             maxFd--;
 695         }
 696     }
 697     return close(fd);
 698 }
 699 \f
 700 /*
 701  *--------------------------------------------------------------
 702  *
 703  * OS_CloseRead --
 704  *
 705  *      Cancel outstanding asynchronous reads and prevent subsequent
 706  *      reads from completing.
 707  *
 708  * Results:
 709  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 710  *              0 success, -1 failure
 711  *
 712  *--------------------------------------------------------------
 713  */
 714 int OS_CloseRead(int fd)
 715 {
 716     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 717         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 718         FD_CLR(fd, &readFdSet);
 719     }
 720
 721     return shutdown(fd, 0);
 722 }
 723
 724 \f
 725 /*
 726  *--------------------------------------------------------------
 727  *
 728  * OS_DoIo --
 729  *
 730  *      This function was formerly OS_Select.  It's purpose is
 731  *      to pull I/O completion events off the queue and dispatch
 732  *      them to the appropriate place.
 733  *
 734  * Results:
 735  *      Returns 0.
 736  *
 737  * Side effects:
 738  *      Handlers are called.
 739  *
 740  *--------------------------------------------------------------
 741  */
 742 int OS_DoIo(struct timeval *tmo)
 743 {
 744     int fd, len, selectStatus;
 745     OS_AsyncProc procPtr;
 746     ClientData clientData;
 747     AioInfo *aioPtr;
 748     fd_set readFdSetCpy;
 749     fd_set writeFdSetCpy;
 750
 751     asyncIoInUse = TRUE;
 752     FD_ZERO(&readFdSetCpy);
 753     FD_ZERO(&writeFdSetCpy);
 754
 755     for(fd = 0; fd <= maxFd; fd++) {
 756         if(FD_ISSET(fd, &readFdSet)) {
 757             FD_SET(fd, &readFdSetCpy);
 758         }
 759         if(FD_ISSET(fd, &writeFdSet)) {
 760             FD_SET(fd, &writeFdSetCpy);
 761         }
 762     }
 763
 764     /*
 765      * If there were no completed events from a prior call, see if there's
 766      * any work to do.
 767      */
 768     if(numRdPosted == 0 && numWrPosted == 0) {
 769         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 770                               NULL, tmo);
 771         if(selectStatus < 0) {
 772             exit(errno);
 773         }
 774
 775         for(fd = 0; fd <= maxFd; fd++) {
 776             /*
 777              * Build up a list of completed events.  We'll work off of
 778              * this list as opposed to looping through the read and write
 779              * fd sets since they can be affected by a callbacl routine.
 780              */
 781             if(FD_ISSET(fd, &readFdSetCpy)) {
 782                 numRdPosted++;
 783                 FD_SET(fd, &readFdSetPost);
 784                 FD_CLR(fd, &readFdSet);
 785             }
 786
 787             if(FD_ISSET(fd, &writeFdSetCpy)) {
 788                 numWrPosted++;
 789                 FD_SET(fd, &writeFdSetPost);
 790                 FD_CLR(fd, &writeFdSet);
 791             }
 792         }
 793     }
 794
 795     if(numRdPosted == 0 && numWrPosted == 0)
 796         return 0;
 797
 798     for(fd = 0; fd <= maxFd; fd++) {
 799         /*
 800          * Do reads and dispatch callback.
 801          */
 802         if(FD_ISSET(fd, &readFdSetPost)
 803            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 804
 805             numRdPosted--;
 806             FD_CLR(fd, &readFdSetPost);
 807             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 808
 809             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 810
 811             procPtr = aioPtr->procPtr;
 812             aioPtr->procPtr = NULL;
 813             clientData = aioPtr->clientData;
 814             aioPtr->inUse = 0;
 815
 816             (*procPtr)(clientData, len);
 817         }
 818
 819         /*
 820          * Do writes and dispatch callback.
 821          */
 822         if(FD_ISSET(fd, &writeFdSetPost) &&
 823            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 824
 825             numWrPosted--;
 826             FD_CLR(fd, &writeFdSetPost);
 827             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 828
 829             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 830
 831             procPtr = aioPtr->procPtr;
 832             aioPtr->procPtr = NULL;
 833             clientData = aioPtr->clientData;
 834             aioPtr->inUse = 0;
 835             (*procPtr)(clientData, len);
 836         }
 837     }
 838     return 0;
 839 }
 840
 841 /*
 842  * Not all systems have strdup().
 843  * @@@ autoconf should determine whether or not this is needed, but for now..
 844  */
 845 char * str_dup(const char * str)
 846 {
 847     char * sdup = (char *) malloc(strlen(str) + 1);
 848
 849     if (sdup)
 850         strcpy(sdup, str);
 851
 852     return sdup;
 853 }
 854
 855 \f
 856 /*
 857  *----------------------------------------------------------------------
 858  *
 859  * ClientAddrOK --
 860  *
 861  *      Checks if a client address is in a list of allowed addresses
 862  *
 863  * Results:
 864  *      TRUE if address list is empty or client address is present
 865  *      in the list, FALSE otherwise.
 866  *
 867  *----------------------------------------------------------------------
 868  */
 869 static int ClientAddrOK(struct sockaddr_in *saPtr, const char *clientList)
 870 {
 871     int result = FALSE;
 872     char *clientListCopy, *cur, *next;
 873
 874     if (clientList == NULL || *clientList == '\0') {
 875         return TRUE;
 876     }
 877
 878     clientListCopy = str_dup(clientList);
 879
 880     for (cur = clientListCopy; cur != NULL; cur = next) {
 881         next = strchr(cur, ',');
 882         if (next != NULL) {
 883             *next++ = '\0';
 884         }
 885         if (inet_addr(cur) == saPtr->sin_addr.s_addr) {
 886             result = TRUE;
 887             break;
 888         }
 889     }
 890
 891     free(clientListCopy);
 892     return result;
 893 }
 894
 895 \f
 896 /*
 897  *----------------------------------------------------------------------
 898  *
 899  * AcquireLock --
 900  *
 901  *      On platforms that implement concurrent calls to accept
 902  *      on a shared listening ipcFd, returns 0.  On other platforms,
 903  *      acquires an exclusive lock across all processes sharing a
 904  *      listening ipcFd, blocking until the lock has been acquired.
 905  *
 906  * Results:
 907  *      0 for successful call, -1 in case of system error (fatal).
 908  *
 909  * Side effects:
 910  *      This process now has the exclusive lock.
 911  *
 912  *----------------------------------------------------------------------
 913  */
 914 static int AcquireLock(int sock, int fail_on_intr)
 915 {
 916 #ifdef USE_LOCKING
 917     do {
 918         struct flock lock;
 919         lock.l_type = F_WRLCK;
 920         lock.l_start = 0;
 921         lock.l_whence = SEEK_SET;
 922         lock.l_len = 0;
 923
 924         if (fcntl(sock, F_SETLKW, &lock) != -1)
 925             return 0;
 926     } while (errno == EINTR && !fail_on_intr);
 927
 928     return -1;
 929
 930 #else
 931     return 0;
 932 #endif
 933 }
 934 \f
 935 /*
 936  *----------------------------------------------------------------------
 937  *
 938  * ReleaseLock --
 939  *
 940  *      On platforms that implement concurrent calls to accept
 941  *      on a shared listening ipcFd, does nothing.  On other platforms,
 942  *      releases an exclusive lock acquired by AcquireLock.
 943  *
 944  * Results:
 945  *      0 for successful call, -1 in case of system error (fatal).
 946  *
 947  * Side effects:
 948  *      This process no longer holds the lock.
 949  *
 950  *----------------------------------------------------------------------
 951  */
 952 static int ReleaseLock(int sock)
 953 {
 954 #ifdef USE_LOCKING
 955     do {
 956         struct flock lock;
 957         lock.l_type = F_UNLCK;
 958         lock.l_start = 0;
 959         lock.l_whence = SEEK_SET;
 960         lock.l_len = 0;
 961
 962         if (fcntl(sock, F_SETLK, &lock) != -1)
 963             return 0;
 964     } while (errno == EINTR);
 965
 966     return -1;
 967
 968 #else
 969     return 0;
 970 #endif
 971 }
 972
 973 \f
 974 /**********************************************************************
 975  * Determine if the errno resulting from a failed accept() warrants a
 976  * retry or exit().  Based on Apache's http_main.c accept() handling
 977  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
 978  */
 979 static int is_reasonable_accept_errno (const int error)
 980 {
 981     switch (error) {
 982 #ifdef EPROTO
 983         /* EPROTO on certain older kernels really means ECONNABORTED, so
 984          * we need to ignore it for them.  See discussion in new-httpd
 985          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
 986          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
 987          * other boxes that implement tcp sockets in userland (i.e. on top of
 988          * STREAMS).  On these systems, EPROTO can actually result in a fatal
 989          * loop.  See PR#981 for example.  It's hard to handle both uses of
 990          * EPROTO. */
 991         case EPROTO:
 992 #endif
 993 #ifdef ECONNABORTED
 994         case ECONNABORTED:
 995 #endif
 996         /* Linux generates the rest of these, other tcp stacks (i.e.
 997          * bsd) tend to hide them behind getsockopt() interfaces.  They
 998          * occur when the net goes sour or the client disconnects after the
 999          * three-way handshake has been done in the kernel but before
1000          * userland has picked up the socket. */
1001 #ifdef ECONNRESET
1002         case ECONNRESET:
1003 #endif
1004 #ifdef ETIMEDOUT
1005         case ETIMEDOUT:
1006 #endif
1007 #ifdef EHOSTUNREACH
1008         case EHOSTUNREACH:
1009 #endif
1010 #ifdef ENETUNREACH
1011         case ENETUNREACH:
1012 #endif
1013             return 1;
1014
1015         default:
1016             return 0;
1017     }
1018 }
1019
1020 /**********************************************************************
1021  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1022  * others?).  When a connect() is made to a Unix Domain socket, but its
1023  * not accept()ed before the web server gets impatient and close()s, an
1024  * accept() results in a valid file descriptor, but no data to read.
1025  * This causes a block on the first read() - which never returns!
1026  *
1027  * Another approach to this is to write() to the socket to provoke a
1028  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1029  * that whatever is written has to be universally ignored by all FastCGI
1030  * web servers, and a SIGPIPE handler has to be installed which returns
1031  * (or SIGPIPE is ignored).
1032  *
1033  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1034  *
1035  * Making it shorter is probably safe, but I'll leave that to you.  Making
1036  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1037  * the faster your application will be able to recover (waiting 2 seconds
1038  * may _cause_ the problem when there is a very high demand). At any rate,
1039  * this is better than perma-blocking.
1040  */
1041 static int is_af_unix_keeper(const int fd)
1042 {
1043     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1044     fd_set read_fds;
1045
1046     FD_ZERO(&read_fds);
1047     FD_SET(fd, &read_fds);
1048
1049     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1050 }
1051
1052 /*
1053  *----------------------------------------------------------------------
1054  *
1055  * OS_Accept --
1056  *
1057  *      Accepts a new FastCGI connection.  This routine knows whether
1058  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1059  *
1060  * Results:
1061  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1062  *
1063  * Side effects:
1064  *      New IPC connection is accepted.
1065  *
1066  *----------------------------------------------------------------------
1067  */
1068 int OS_Accept(int listen_sock, int fail_on_intr, const char *webServerAddrs)
1069 {
1070     int socket;
1071     union {
1072         struct sockaddr_un un;
1073         struct sockaddr_in in;
1074     } sa;
1075
1076     for (;;) {
1077         if (AcquireLock(listen_sock, fail_on_intr))
1078             return -1;
1079
1080         for (;;) {
1081             do {
1082 #ifdef HAVE_SOCKLEN
1083                 socklen_t len = sizeof(sa);
1084 #else
1085                 int len = sizeof(sa);
1086 #endif
1087                 socket = accept(listen_sock, (struct sockaddr *)&sa, &len);
1088             } while (socket < 0 && errno == EINTR && !fail_on_intr);
1089
1090             if (socket < 0) {
1091                 if (!is_reasonable_accept_errno(errno)) {
1092                     int errnoSave = errno;
1093                     ReleaseLock(listen_sock);
1094                     errno = errnoSave;
1095                     return (-1);
1096                 }
1097                 errno = 0;
1098             }
1099             else {  /* socket >= 0 */
1100                 int set = 1;
1101
1102                 if (sa.in.sin_family != AF_INET)
1103                     break;
1104
1105 #ifdef TCP_NODELAY
1106                 /* No replies to outgoing data, so disable Nagle */
1107                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1108 #endif
1109
1110                 /* Check that the client IP address is approved */
1111                 if (ClientAddrOK(&sa.in, webServerAddrs))
1112                     break;
1113
1114                 close(socket);
1115             }  /* socket >= 0 */
1116         }  /* for(;;) */
1117
1118         if (ReleaseLock(listen_sock))
1119             return (-1);
1120
1121         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1122             break;
1123
1124         close(socket);
1125     }  /* while(1) - lock */
1126
1127     return (socket);
1128 }
1129 \f
1130 /*
1131  *----------------------------------------------------------------------
1132  *
1133  * OS_IpcClose
1134  *
1135  *      OS IPC routine to close an IPC connection.
1136  *
1137  * Results:
1138  *
1139  *
1140  * Side effects:
1141  *      IPC connection is closed.
1142  *
1143  *----------------------------------------------------------------------
1144  */
1145 int OS_IpcClose(int ipcFd)
1146 {
1147     return OS_Close(ipcFd);
1148 }
1149
1150 \f
1151 /*
1152  *----------------------------------------------------------------------
1153  *
1154  * OS_IsFcgi --
1155  *
1156  *      Determines whether this process is a FastCGI process or not.
1157  *
1158  * Results:
1159  *      Returns 1 if FastCGI, 0 if not.
1160  *
1161  * Side effects:
1162  *      None.
1163  *
1164  *----------------------------------------------------------------------
1165  */
1166 int OS_IsFcgi(int sock)
1167 {
1168         union {
1169         struct sockaddr_in in;
1170         struct sockaddr_un un;
1171     } sa;
1172 #ifdef HAVE_SOCKLEN
1173     socklen_t len = sizeof(sa);
1174 #else
1175     int len = sizeof(sa);
1176 #endif
1177
1178     errno = 0;
1179     if (getpeername(sock, (struct sockaddr *)&sa, &len) != 0 && errno == ENOTCONN) {
1180         return TRUE;
1181     }
1182     else {
1183         return FALSE;
1184     }
1185 }
1186 \f
1187 /*
1188  *----------------------------------------------------------------------
1189  *
1190  * OS_SetFlags --
1191  *
1192  *      Sets selected flag bits in an open file descriptor.
1193  *
1194  *----------------------------------------------------------------------
1195  */
1196 void OS_SetFlags(int fd, int flags)
1197 {
1198     int val;
1199     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1200         exit(errno);
1201     }
1202     val |= flags;
1203     if(fcntl(fd, F_SETFL, val) < 0) {
1204         exit(errno);
1205     }
1206 }