libfcgi/os_unix.c

   1 /*
   2  * os_unix.c --
   3  *
   4  *      Description of file.
   5  *
   6  *
   7  *  Copyright (c) 1995 Open Market, Inc.
   8  *  All rights reserved.
   9  *
  10  *  This file contains proprietary and confidential information and
  11  *  remains the unpublished property of Open Market, Inc. Use,
  12  *  disclosure, or reproduction is prohibited except as permitted by
  13  *  express written license agreement with Open Market, Inc.
  14  *
  15  *  Bill Snapper
  16  *  snapper@openmarket.com
  17  */
  18
  19 #ifndef lint
  20 static const char rcsid[] = "$Id: os_unix.c,v 1.8 1999/02/06 05:08:33 roberts Exp $";
  21 #endif /* not lint */
  22
  23 #include "fcgimisc.h"
  24 #include "fcgiapp.h"
  25 #include "fcgiappmisc.h"
  26 #include "fastcgi.h"
  27
  28 #include <stdio.h>
  29 #ifdef HAVE_UNISTD_H
  30 #include <unistd.h>
  31 #endif
  32 #include <assert.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <memory.h>     /* for memchr() */
  36 #include <errno.h>
  37 #include <stdarg.h>
  38 #include <math.h>
  39 #ifdef HAVE_SYS_SOCKET_H
  40 #include <sys/socket.h> /* for getpeername */
  41 #endif
  42 #include <sys/un.h>
  43 #include <fcntl.h>      /* for fcntl */
  44 #ifdef HAVE_NETDB_H
  45 #include <netdb.h>
  46 #endif
  47 #include <sys/time.h>
  48
  49 #include <sys/types.h>
  50 #ifdef HAVE_NETINET_IN_H
  51 #include <netinet/in.h>
  52 #endif
  53 #include <arpa/inet.h>
  54 #include <netinet/tcp.h>
  55
  56 #include "fcgios.h"
  57
  58 #ifndef _CLIENTDATA
  59 #   if defined(__STDC__) || defined(__cplusplus)
  60     typedef void *ClientData;
  61 #   else
  62     typedef int *ClientData;
  63 #   endif /* __STDC__ */
  64 #define _CLIENTDATA
  65 #endif
  66
  67 /*
  68  * This structure holds an entry for each oustanding async I/O operation.
  69  */
  70 typedef struct {
  71     OS_AsyncProc procPtr;           /* callout completion procedure */
  72     ClientData clientData;          /* caller private data */
  73     int fd;
  74     int len;
  75     int offset;
  76     void *buf;
  77     int inUse;
  78 } AioInfo;
  79
  80 /*
  81  * Entries in the async I/O table are allocated 2 per file descriptor.
  82  *
  83  * Read Entry Index  = fd * 2
  84  * Write Entry Index = (fd * 2) + 1
  85  */
  86 #define AIO_RD_IX(fd) (fd * 2)
  87 #define AIO_WR_IX(fd) ((fd * 2) + 1)
  88
  89 static int asyncIoTableSize = 16;
  90 static AioInfo *asyncIoTable = NULL;
  91 #define STDIN_FILENO  0
  92 #define STDOUT_FILENO 1
  93 #define STDERR_FILENO 2
  94
  95 #ifndef FALSE
  96 #define FALSE 0
  97 #endif
  98
  99 #ifndef TRUE
 100 #define TRUE 1
 101 #endif
 102
 103 static int isFastCGI = FALSE;
 104 static int libInitialized = FALSE;
 105
 106 static fd_set readFdSet;
 107 static fd_set writeFdSet;
 108
 109 static fd_set readFdSetPost;
 110 static int numRdPosted = 0;
 111 static fd_set writeFdSetPost;
 112 static int numWrPosted = 0;
 113 static int volatile maxFd = -1;
 114
 115 \f
 116 /*
 117  *--------------------------------------------------------------
 118  *
 119  * OS_LibInit --
 120  *
 121  *      Set up the OS library for use.
 122  *
 123  *      NOTE: This function is really only needed for application
 124  *            asynchronous I/O.  It will most likely change in the
 125  *            future to setup the multi-threaded environment.
 126  *
 127  * Results:
 128  *      Returns 0 if success, -1 if not.
 129  *
 130  * Side effects:
 131  *      Async I/O table allocated and initialized.
 132  *
 133  *--------------------------------------------------------------
 134  */
 135 int OS_LibInit(int stdioFds[3])
 136 {
 137     if(libInitialized)
 138         return 0;
 139
 140     asyncIoTable = malloc(asyncIoTableSize * sizeof(AioInfo));
 141     if(asyncIoTable == NULL) {
 142         errno = ENOMEM;
 143         return -1;
 144     }
 145     memset((char *) asyncIoTable, 0,
 146            asyncIoTableSize * sizeof(AioInfo));
 147
 148     FD_ZERO(&readFdSet);
 149     FD_ZERO(&writeFdSet);
 150     FD_ZERO(&readFdSetPost);
 151     FD_ZERO(&writeFdSetPost);
 152     libInitialized = TRUE;
 153     return 0;
 154 }
 155
 156 \f
 157 /*
 158  *--------------------------------------------------------------
 159  *
 160  * OS_LibShutdown --
 161  *
 162  *      Shutdown the OS library.
 163  *
 164  * Results:
 165  *      None.
 166  *
 167  * Side effects:
 168  *      Memory freed, fds closed.
 169  *
 170  *--------------------------------------------------------------
 171  */
 172 void OS_LibShutdown()
 173 {
 174     if(!libInitialized)
 175         return;
 176
 177     free(asyncIoTable);
 178     asyncIoTable = NULL;
 179     libInitialized = FALSE;
 180     return;
 181 }
 182
 183 \f
 184 /*
 185  *----------------------------------------------------------------------
 186  *
 187  * OS_BuildSockAddrUn --
 188  *
 189  *      Using the pathname bindPath, fill in the sockaddr_un structure
 190  *      *servAddrPtr and the length of this structure *servAddrLen.
 191  *
 192  *      The format of the sockaddr_un structure changed incompatibly in
 193  *      4.3BSD Reno.  Digital UNIX supports both formats, other systems
 194  *      support one or the other.
 195  *
 196  * Results:
 197  *      0 for normal return, -1 for failure (bindPath too long).
 198  *
 199  *----------------------------------------------------------------------
 200  */
 201
 202 static int OS_BuildSockAddrUn(char *bindPath,
 203                               struct sockaddr_un *servAddrPtr,
 204                               int *servAddrLen)
 205 {
 206     int bindPathLen = strlen(bindPath);
 207
 208 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 209     if(bindPathLen >= sizeof(servAddrPtr->sun_path)) {
 210         return -1;
 211     }
 212 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 213     if(bindPathLen > sizeof(servAddrPtr->sun_path)) {
 214         return -1;
 215     }
 216 #endif
 217     memset((char *) servAddrPtr, 0, sizeof(*servAddrPtr));
 218     servAddrPtr->sun_family = AF_UNIX;
 219     memcpy(servAddrPtr->sun_path, bindPath, bindPathLen);
 220 #ifdef HAVE_SOCKADDR_UN_SUN_LEN /* 4.3BSD Reno and later: BSDI, DEC */
 221     *servAddrLen = sizeof(servAddrPtr->sun_len)
 222             + sizeof(servAddrPtr->sun_family)
 223             + bindPathLen + 1;
 224     servAddrPtr->sun_len = *servAddrLen;
 225 #else                           /* 4.3 BSD Tahoe: Solaris, HPUX, DEC, ... */
 226     *servAddrLen = sizeof(servAddrPtr->sun_family) + bindPathLen;
 227 #endif
 228     return 0;
 229 }
 230 \f
 231 union SockAddrUnion {
 232     struct  sockaddr_un unixVariant;
 233     struct  sockaddr_in inetVariant;
 234 };
 235
 236 \f
 237 /*
 238  * OS_CreateLocalIpcFd --
 239  *
 240  *   This procedure is responsible for creating the listener socket
 241  *   on Unix for local process communication.  It will create a
 242  *   domain socket or a TCP/IP socket bound to "localhost" and return
 243  *   a file descriptor to it to the caller.
 244  *
 245  * Results:
 246  *      Listener socket created.  This call returns either a valid
 247  *      file descriptor or -1 on error.
 248  *
 249  * Side effects:
 250  *      None.
 251  *
 252  *----------------------------------------------------------------------
 253  */
 254 int OS_CreateLocalIpcFd(char *bindPath)
 255 {
 256     int listenSock, servLen;
 257     union   SockAddrUnion sa;
 258     int     tcp = FALSE;
 259     char    *tp;
 260     short   port;
 261     char    host[MAXPATHLEN];
 262
 263     strcpy(host, bindPath);
 264     if((tp = strchr(host, ':')) != 0) {
 265         *tp++ = 0;
 266         if((port = atoi(tp)) == 0) {
 267             *--tp = ':';
 268          } else {
 269             tcp = TRUE;
 270          }
 271     }
 272     if(tcp && (*host && strcmp(host, "localhost") != 0)) {
 273         fprintf(stderr, "To start a service on a TCP port can not "
 274                         "specify a host name.\n"
 275                         "You should either use \"localhost:<port>\" or "
 276                         " just use \":<port>.\"\n");
 277         exit(1);
 278     }
 279
 280     if(tcp) {
 281         listenSock = socket(AF_INET, SOCK_STREAM, 0);
 282         if(listenSock >= 0) {
 283             int flag = 1;
 284             if(setsockopt(listenSock, SOL_SOCKET, SO_REUSEADDR,
 285                           (char *) &flag, sizeof(flag)) < 0) {
 286                 fprintf(stderr, "Can't set SO_REUSEADDR.\n");
 287                 exit(1001);
 288             }
 289         }
 290     } else {
 291         listenSock = socket(AF_UNIX, SOCK_STREAM, 0);
 292     }
 293     if(listenSock < 0) {
 294         return -1;
 295     }
 296
 297     /*
 298      * Bind the listening socket.
 299      */
 300     if(tcp) {
 301         memset((char *) &sa.inetVariant, 0, sizeof(sa.inetVariant));
 302         sa.inetVariant.sin_family = AF_INET;
 303         sa.inetVariant.sin_addr.s_addr = htonl(INADDR_ANY);
 304         sa.inetVariant.sin_port = htons(port);
 305         servLen = sizeof(sa.inetVariant);
 306     } else {
 307         unlink(bindPath);
 308         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 309             fprintf(stderr, "Listening socket's path name is too long.\n");
 310             exit(1000);
 311         }
 312     }
 313     if(bind(listenSock, (struct sockaddr *) &sa.unixVariant, servLen) < 0
 314        || listen(listenSock, 5) < 0) {
 315         perror("bind/listen");
 316         exit(errno);
 317     }
 318
 319     return listenSock;
 320 }
 321
 322 \f
 323 /*
 324  *----------------------------------------------------------------------
 325  *
 326  * OS_FcgiConnect --
 327  *
 328  *      Create the socket and connect to the remote application if
 329  *      possible.
 330  *
 331  *      This was lifted from the cgi-fcgi application and was abstracted
 332  *      out because Windows NT does not have a domain socket and must
 333  *      use a named pipe which has a different API altogether.
 334  *
 335  * Results:
 336  *      -1 if fail or a valid file descriptor if connection succeeds.
 337  *
 338  * Side effects:
 339  *      Remote connection established.
 340  *
 341  *----------------------------------------------------------------------
 342  */
 343 int OS_FcgiConnect(char *bindPath)
 344 {
 345     union   SockAddrUnion sa;
 346     int servLen, resultSock;
 347     int connectStatus;
 348     char    *tp;
 349     char    host[MAXPATHLEN];
 350     short   port;
 351     int     tcp = FALSE;
 352
 353     strcpy(host, bindPath);
 354     if((tp = strchr(host, ':')) != 0) {
 355         *tp++ = 0;
 356         if((port = atoi(tp)) == 0) {
 357             *--tp = ':';
 358          } else {
 359             tcp = TRUE;
 360          }
 361     }
 362     if(tcp == TRUE) {
 363         struct  hostent *hp;
 364         if((hp = gethostbyname((*host ? host : "localhost"))) == NULL) {
 365             fprintf(stderr, "Unknown host: %s\n", bindPath);
 366             exit(1000);
 367         }
 368         sa.inetVariant.sin_family = AF_INET;
 369         memcpy(&sa.inetVariant.sin_addr, hp->h_addr, hp->h_length);
 370         sa.inetVariant.sin_port = htons(port);
 371         servLen = sizeof(sa.inetVariant);
 372         resultSock = socket(AF_INET, SOCK_STREAM, 0);
 373     } else {
 374         if(OS_BuildSockAddrUn(bindPath, &sa.unixVariant, &servLen)) {
 375             fprintf(stderr, "Listening socket's path name is too long.\n");
 376             exit(1000);
 377         }
 378         resultSock = socket(AF_UNIX, SOCK_STREAM, 0);
 379     }
 380
 381     assert(resultSock >= 0);
 382     connectStatus = connect(resultSock, (struct sockaddr *) &sa.unixVariant,
 383                              servLen);
 384     if(connectStatus >= 0) {
 385         return resultSock;
 386     } else {
 387         /*
 388          * Most likely (errno == ENOENT || errno == ECONNREFUSED)
 389          * and no FCGI application server is running.
 390          */
 391         close(resultSock);
 392         return -1;
 393     }
 394 }
 395
 396 \f
 397 /*
 398  *--------------------------------------------------------------
 399  *
 400  * OS_Read --
 401  *
 402  *      Pass through to the unix read function.
 403  *
 404  * Results:
 405  *      Returns number of byes read, 0, or -1 failure: errno
 406  *      contains actual error.
 407  *
 408  * Side effects:
 409  *      None.
 410  *
 411  *--------------------------------------------------------------
 412  */
 413 int OS_Read(int fd, char * buf, size_t len)
 414 {
 415     return(read(fd, buf, len));
 416 }
 417 \f
 418 /*
 419  *--------------------------------------------------------------
 420  *
 421  * OS_Write --
 422  *
 423  *      Pass through to unix write function.
 424  *
 425  * Results:
 426  *      Returns number of byes read, 0, or -1 failure: errno
 427  *      contains actual error.
 428  *
 429  * Side effects:
 430  *      none.
 431  *
 432  *--------------------------------------------------------------
 433  */
 434 int OS_Write(int fd, char * buf, size_t len)
 435 {
 436     return(write(fd, buf, len));
 437 }
 438
 439 \f
 440 /*
 441  *----------------------------------------------------------------------
 442  *
 443  * OS_SpawnChild --
 444  *
 445  *      Spawns a new FastCGI listener process.
 446  *
 447  * Results:
 448  *      0 if success, -1 if error.
 449  *
 450  * Side effects:
 451  *      Child process spawned.
 452  *
 453  *----------------------------------------------------------------------
 454  */
 455 int OS_SpawnChild(char *appPath, int listenFd)
 456 {
 457     int forkResult;
 458
 459     forkResult = fork();
 460     if(forkResult < 0) {
 461         exit(errno);
 462     }
 463
 464     if(forkResult == 0) {
 465         /*
 466          * Close STDIN unconditionally.  It's used by the parent
 467          * process for CGI communication.  The FastCGI applciation
 468          * will be replacing this with the FastCGI listenFd IF
 469          * STDIN_FILENO is the same as FCGI_LISTENSOCK_FILENO
 470          * (which it is on Unix).  Regardless, STDIN, STDOUT, and
 471          * STDERR will be closed as the FastCGI process uses a
 472          * multiplexed socket in their place.
 473          */
 474         close(STDIN_FILENO);
 475
 476         /*
 477          * If the listenFd is already the value of FCGI_LISTENSOCK_FILENO
 478          * we're set.  If not, change it so the child knows where to
 479          * get the listen socket from.
 480          */
 481         if(listenFd != FCGI_LISTENSOCK_FILENO) {
 482             dup2(listenFd, FCGI_LISTENSOCK_FILENO);
 483             close(listenFd);
 484         }
 485
 486         close(STDOUT_FILENO);
 487         close(STDERR_FILENO);
 488
 489         /*
 490          * We're a child.  Exec the application.
 491          *
 492          * XXX: entire environment passes through
 493          */
 494         execl(appPath, appPath, NULL);
 495         /*
 496          * XXX: Can't do this as we've already closed STDERR!!!
 497          *
 498          * perror("exec");
 499          */
 500         exit(errno);
 501     }
 502     return 0;
 503 }
 504
 505 \f
 506 /*
 507  *--------------------------------------------------------------
 508  *
 509  * OS_AsyncReadStdin --
 510  *
 511  *      This initiates an asynchronous read on the standard
 512  *      input handle.
 513  *
 514  *      The abstraction is necessary because Windows NT does not
 515  *      have a clean way of "select"ing a file descriptor for
 516  *      I/O.
 517  *
 518  * Results:
 519  *      -1 if error, 0 otherwise.
 520  *
 521  * Side effects:
 522  *      Asynchronous bit is set in the readfd variable and
 523  *      request is enqueued.
 524  *
 525  *--------------------------------------------------------------
 526  */
 527 int OS_AsyncReadStdin(void *buf, int len, OS_AsyncProc procPtr,
 528                       ClientData clientData)
 529 {
 530     int index = AIO_RD_IX(STDIN_FILENO);
 531
 532     ASSERT(asyncIoTable[index].inUse == 0);
 533     asyncIoTable[index].procPtr = procPtr;
 534     asyncIoTable[index].clientData = clientData;
 535     asyncIoTable[index].fd = STDIN_FILENO;
 536     asyncIoTable[index].len = len;
 537     asyncIoTable[index].offset = 0;
 538     asyncIoTable[index].buf = buf;
 539     asyncIoTable[index].inUse = 1;
 540     FD_SET(STDIN_FILENO, &readFdSet);
 541     if(STDIN_FILENO > maxFd)
 542         maxFd = STDIN_FILENO;
 543     return 0;
 544 }
 545
 546 static void GrowAsyncTable(void)
 547 {
 548     int oldTableSize = asyncIoTableSize;
 549
 550     asyncIoTableSize = asyncIoTableSize * 2;
 551     asyncIoTable = realloc(asyncIoTable, asyncIoTableSize * sizeof(AioInfo));
 552     if(asyncIoTable == NULL) {
 553         errno = ENOMEM;
 554         exit(errno);
 555     }
 556     memset((char *) &asyncIoTable[oldTableSize], 0,
 557            oldTableSize * sizeof(AioInfo));
 558
 559 }
 560
 561 \f
 562 /*
 563  *--------------------------------------------------------------
 564  *
 565  * OS_AsyncRead --
 566  *
 567  *      This initiates an asynchronous read on the file
 568  *      handle which may be a socket or named pipe.
 569  *
 570  *      We also must save the ProcPtr and ClientData, so later
 571  *      when the io completes, we know who to call.
 572  *
 573  *      We don't look at any results here (the ReadFile may
 574  *      return data if it is cached) but do all completion
 575  *      processing in OS_Select when we get the io completion
 576  *      port done notifications.  Then we call the callback.
 577  *
 578  * Results:
 579  *      -1 if error, 0 otherwise.
 580  *
 581  * Side effects:
 582  *      Asynchronous I/O operation is queued for completion.
 583  *
 584  *--------------------------------------------------------------
 585  */
 586 int OS_AsyncRead(int fd, int offset, void *buf, int len,
 587                  OS_AsyncProc procPtr, ClientData clientData)
 588 {
 589     int index = AIO_RD_IX(fd);
 590
 591     ASSERT(asyncIoTable != NULL);
 592
 593     if(fd > maxFd)
 594         maxFd = fd;
 595
 596     if(index >= asyncIoTableSize) {
 597         GrowAsyncTable();
 598     }
 599
 600     ASSERT(asyncIoTable[index].inUse == 0);
 601     asyncIoTable[index].procPtr = procPtr;
 602     asyncIoTable[index].clientData = clientData;
 603     asyncIoTable[index].fd = fd;
 604     asyncIoTable[index].len = len;
 605     asyncIoTable[index].offset = offset;
 606     asyncIoTable[index].buf = buf;
 607     asyncIoTable[index].inUse = 1;
 608     FD_SET(fd, &readFdSet);
 609     return 0;
 610 }
 611 \f
 612 /*
 613  *--------------------------------------------------------------
 614  *
 615  * OS_AsyncWrite --
 616  *
 617  *      This initiates an asynchronous write on the "fake" file
 618  *      descriptor (which may be a file, socket, or named pipe).
 619  *      We also must save the ProcPtr and ClientData, so later
 620  *      when the io completes, we know who to call.
 621  *
 622  *      We don't look at any results here (the WriteFile generally
 623  *      completes immediately) but do all completion processing
 624  *      in OS_DoIo when we get the io completion port done
 625  *      notifications.  Then we call the callback.
 626  *
 627  * Results:
 628  *      -1 if error, 0 otherwise.
 629  *
 630  * Side effects:
 631  *      Asynchronous I/O operation is queued for completion.
 632  *
 633  *--------------------------------------------------------------
 634  */
 635 int OS_AsyncWrite(int fd, int offset, void *buf, int len,
 636                   OS_AsyncProc procPtr, ClientData clientData)
 637 {
 638     int index = AIO_WR_IX(fd);
 639
 640     if(fd > maxFd)
 641         maxFd = fd;
 642
 643     if(index >= asyncIoTableSize) {
 644         GrowAsyncTable();
 645     }
 646
 647     ASSERT(asyncIoTable[index].inUse == 0);
 648     asyncIoTable[index].procPtr = procPtr;
 649     asyncIoTable[index].clientData = clientData;
 650     asyncIoTable[index].fd = fd;
 651     asyncIoTable[index].len = len;
 652     asyncIoTable[index].offset = offset;
 653     asyncIoTable[index].buf = buf;
 654     asyncIoTable[index].inUse = 1;
 655     FD_SET(fd, &writeFdSet);
 656     return 0;
 657 }
 658 \f
 659 /*
 660  *--------------------------------------------------------------
 661  *
 662  * OS_Close --
 663  *
 664  *      Closes the descriptor.  This is a pass through to the
 665  *      Unix close.
 666  *
 667  * Results:
 668  *      0 for success, -1 on failure
 669  *
 670  * Side effects:
 671  *      None.
 672  *
 673  *--------------------------------------------------------------
 674  */
 675 int OS_Close(int fd)
 676 {
 677     int index = AIO_RD_IX(fd);
 678
 679     FD_CLR(fd, &readFdSet);
 680     FD_CLR(fd, &readFdSetPost);
 681     if(asyncIoTable[index].inUse != 0) {
 682         asyncIoTable[index].inUse = 0;
 683     }
 684
 685     FD_CLR(fd, &writeFdSet);
 686     FD_CLR(fd, &writeFdSetPost);
 687     index = AIO_WR_IX(fd);
 688     if(asyncIoTable[index].inUse != 0) {
 689         asyncIoTable[index].inUse = 0;
 690     }
 691     if(maxFd == fd)
 692         maxFd--;
 693     return close(fd);
 694 }
 695 \f
 696 /*
 697  *--------------------------------------------------------------
 698  *
 699  * OS_CloseRead --
 700  *
 701  *      Cancel outstanding asynchronous reads and prevent subsequent
 702  *      reads from completing.
 703  *
 704  * Results:
 705  *      Socket or file is shutdown. Return values mimic Unix shutdown:
 706  *              0 success, -1 failure
 707  *
 708  *--------------------------------------------------------------
 709  */
 710 int OS_CloseRead(int fd)
 711 {
 712     if(asyncIoTable[AIO_RD_IX(fd)].inUse != 0) {
 713         asyncIoTable[AIO_RD_IX(fd)].inUse = 0;
 714         FD_CLR(fd, &readFdSet);
 715     }
 716
 717     return shutdown(fd, 0);
 718 }
 719
 720 \f
 721 /*
 722  *--------------------------------------------------------------
 723  *
 724  * OS_DoIo --
 725  *
 726  *      This function was formerly OS_Select.  It's purpose is
 727  *      to pull I/O completion events off the queue and dispatch
 728  *      them to the appropriate place.
 729  *
 730  * Results:
 731  *      Returns 0.
 732  *
 733  * Side effects:
 734  *      Handlers are called.
 735  *
 736  *--------------------------------------------------------------
 737  */
 738 int OS_DoIo(struct timeval *tmo)
 739 {
 740     int fd, len, selectStatus;
 741     OS_AsyncProc procPtr;
 742     ClientData clientData;
 743     AioInfo *aioPtr;
 744     fd_set readFdSetCpy;
 745     fd_set writeFdSetCpy;
 746
 747     FD_ZERO(&readFdSetCpy);
 748     FD_ZERO(&writeFdSetCpy);
 749
 750     for(fd = 0; fd <= maxFd; fd++) {
 751         if(FD_ISSET(fd, &readFdSet)) {
 752             FD_SET(fd, &readFdSetCpy);
 753         }
 754         if(FD_ISSET(fd, &writeFdSet)) {
 755             FD_SET(fd, &writeFdSetCpy);
 756         }
 757     }
 758
 759     /*
 760      * If there were no completed events from a prior call, see if there's
 761      * any work to do.
 762      */
 763     if(numRdPosted == 0 && numWrPosted == 0) {
 764         selectStatus = select((maxFd+1), &readFdSetCpy, &writeFdSetCpy,
 765                               NULL, tmo);
 766         if(selectStatus < 0) {
 767             exit(errno);
 768         }
 769
 770         for(fd = 0; fd <= maxFd; fd++) {
 771             /*
 772              * Build up a list of completed events.  We'll work off of
 773              * this list as opposed to looping through the read and write
 774              * fd sets since they can be affected by a callbacl routine.
 775              */
 776             if(FD_ISSET(fd, &readFdSetCpy)) {
 777                 numRdPosted++;
 778                 FD_SET(fd, &readFdSetPost);
 779                 FD_CLR(fd, &readFdSet);
 780             }
 781
 782             if(FD_ISSET(fd, &writeFdSetCpy)) {
 783                 numWrPosted++;
 784                 FD_SET(fd, &writeFdSetPost);
 785                 FD_CLR(fd, &writeFdSet);
 786             }
 787         }
 788     }
 789
 790     if(numRdPosted == 0 && numWrPosted == 0)
 791         return 0;
 792
 793     for(fd = 0; fd <= maxFd; fd++) {
 794         /*
 795          * Do reads and dispatch callback.
 796          */
 797         if(FD_ISSET(fd, &readFdSetPost)
 798            && asyncIoTable[AIO_RD_IX(fd)].inUse) {
 799
 800             numRdPosted--;
 801             FD_CLR(fd, &readFdSetPost);
 802             aioPtr = &asyncIoTable[AIO_RD_IX(fd)];
 803
 804             len = read(aioPtr->fd, aioPtr->buf, aioPtr->len);
 805
 806             procPtr = aioPtr->procPtr;
 807             aioPtr->procPtr = NULL;
 808             clientData = aioPtr->clientData;
 809             aioPtr->inUse = 0;
 810
 811             (*procPtr)(clientData, len);
 812         }
 813
 814         /*
 815          * Do writes and dispatch callback.
 816          */
 817         if(FD_ISSET(fd, &writeFdSetPost) &&
 818            asyncIoTable[AIO_WR_IX(fd)].inUse) {
 819
 820             numWrPosted--;
 821             FD_CLR(fd, &writeFdSetPost);
 822             aioPtr = &asyncIoTable[AIO_WR_IX(fd)];
 823
 824             len = write(aioPtr->fd, aioPtr->buf, aioPtr->len);
 825
 826             procPtr = aioPtr->procPtr;
 827             aioPtr->procPtr = NULL;
 828             clientData = aioPtr->clientData;
 829             aioPtr->inUse = 0;
 830             (*procPtr)(clientData, len);
 831         }
 832     }
 833     return 0;
 834 }
 835
 836 \f
 837 /*
 838  *----------------------------------------------------------------------
 839  *
 840  * ClientAddrOK --
 841  *
 842  *      Checks if a client address is in a list of allowed addresses
 843  *
 844  * Results:
 845  *      TRUE if address list is empty or client address is present
 846  *      in the list, FALSE otherwise.
 847  *
 848  *----------------------------------------------------------------------
 849  */
 850 static int ClientAddrOK(struct sockaddr_in *saPtr, char *clientList)
 851 {
 852     int result = FALSE;
 853     char *clientListCopy, *cur, *next;
 854     char *newString = NULL;
 855     int strLen;
 856
 857     if(clientList == NULL || *clientList == '\0') {
 858         return TRUE;
 859     }
 860
 861     strLen = strlen(clientList);
 862     clientListCopy = malloc(strLen + 1);
 863     assert(newString != NULL);
 864     memcpy(newString, clientList, strLen);
 865     newString[strLen] = '\000';
 866
 867     for(cur = clientListCopy; cur != NULL; cur = next) {
 868         next = strchr(cur, ',');
 869         if(next != NULL) {
 870             *next++ = '\0';
 871         }
 872         if(inet_addr(cur) == saPtr->sin_addr.s_addr) {
 873             result = TRUE;
 874             break;
 875         }
 876     }
 877     free(clientListCopy);
 878     return result;
 879 }
 880
 881 \f
 882 /*
 883  *----------------------------------------------------------------------
 884  *
 885  * AcquireLock --
 886  *
 887  *      On platforms that implement concurrent calls to accept
 888  *      on a shared listening ipcFd, returns 0.  On other platforms,
 889  *      acquires an exclusive lock across all processes sharing a
 890  *      listening ipcFd, blocking until the lock has been acquired.
 891  *
 892  * Results:
 893  *      0 for successful call, -1 in case of system error (fatal).
 894  *
 895  * Side effects:
 896  *      This process now has the exclusive lock.
 897  *
 898  *----------------------------------------------------------------------
 899  */
 900 static int AcquireLock(int blocking)
 901 {
 902 #ifdef USE_LOCKING
 903     struct flock lock;
 904     lock.l_type = F_WRLCK;
 905     lock.l_start = 0;
 906     lock.l_whence = SEEK_SET;
 907     lock.l_len = 0;
 908
 909     if(fcntl(FCGI_LISTENSOCK_FILENO,
 910              blocking ? F_SETLKW : F_SETLK, &lock) < 0) {
 911         if (errno != EINTR)
 912             return -1;
 913     }
 914 #endif /* USE_LOCKING */
 915     return 0;
 916 }
 917 \f
 918 /*
 919  *----------------------------------------------------------------------
 920  *
 921  * ReleaseLock --
 922  *
 923  *      On platforms that implement concurrent calls to accept
 924  *      on a shared listening ipcFd, does nothing.  On other platforms,
 925  *      releases an exclusive lock acquired by AcquireLock.
 926  *
 927  * Results:
 928  *      0 for successful call, -1 in case of system error (fatal).
 929  *
 930  * Side effects:
 931  *      This process no longer holds the lock.
 932  *
 933  *----------------------------------------------------------------------
 934  */
 935 static int ReleaseLock(void)
 936 {
 937 #ifdef USE_LOCKING
 938     struct flock lock;
 939     lock.l_type = F_UNLCK;
 940     lock.l_start = 0;
 941     lock.l_whence = SEEK_SET;
 942     lock.l_len = 0;
 943
 944     if(fcntl(FCGI_LISTENSOCK_FILENO, F_SETLK, &lock) < 0) {
 945         return -1;
 946     }
 947 #endif /* USE_LOCKING */
 948     return 0;
 949 }
 950
 951 \f
 952 /**********************************************************************
 953  * Determine if the errno resulting from a failed accept() warrants a
 954  * retry or exit().  Based on Apache's http_main.c accept() handling
 955  * and Stevens' Unix Network Programming Vol 1, 2nd Ed, para. 15.6.
 956  */
 957 static int is_reasonable_accept_errno (const int error)
 958 {
 959     switch (error) {
 960 #ifdef EPROTO
 961         /* EPROTO on certain older kernels really means ECONNABORTED, so
 962          * we need to ignore it for them.  See discussion in new-httpd
 963          * archives nh.9701 search for EPROTO.  Also see nh.9603, search
 964          * for EPROTO:  There is potentially a bug in Solaris 2.x x<6, and
 965          * other boxes that implement tcp sockets in userland (i.e. on top of
 966          * STREAMS).  On these systems, EPROTO can actually result in a fatal
 967          * loop.  See PR#981 for example.  It's hard to handle both uses of
 968          * EPROTO. */
 969         case EPROTO:
 970 #endif
 971 #ifdef ECONNABORTED
 972         case ECONNABORTED:
 973 #endif
 974         /* Linux generates the rest of these, other tcp stacks (i.e.
 975          * bsd) tend to hide them behind getsockopt() interfaces.  They
 976          * occur when the net goes sour or the client disconnects after the
 977          * three-way handshake has been done in the kernel but before
 978          * userland has picked up the socket. */
 979 #ifdef ECONNRESET
 980         case ECONNRESET:
 981 #endif
 982 #ifdef ETIMEDOUT
 983         case ETIMEDOUT:
 984 #endif
 985 #ifdef EHOSTUNREACH
 986         case EHOSTUNREACH:
 987 #endif
 988 #ifdef ENETUNREACH
 989         case ENETUNREACH:
 990 #endif
 991             return 1;
 992
 993         default:
 994             return 0;
 995     }
 996 }
 997
 998 /**********************************************************************
 999  * This works around a problem on Linux 2.0.x and SCO Unixware (maybe
1000  * others?).  When a connect() is made to a Unix Domain socket, but its
1001  * not accept()ed before the web server gets impatient and close()s, an
1002  * accept() results in a valid file descriptor, but no data to read.
1003  * This causes a block on the first read() - which never returns!
1004  *
1005  * Another approach to this is to write() to the socket to provoke a
1006  * SIGPIPE, but this is a pain because of the FastCGI protocol, the fact
1007  * that whatever is written has to be universally ignored by all FastCGI
1008  * web servers, and a SIGPIPE handler has to be installed which returns
1009  * (or SIGPIPE is ignored).
1010  *
1011  * READABLE_UNIX_FD_DROP_DEAD_TIMEVAL = 2,0 by default.
1012  *
1013  * Making it shorter is probably safe, but I'll leave that to you.  Making
1014  * it 0,0 doesn't work reliably.  The shorter you can reliably make it,
1015  * the faster your application will be able to recover (waiting 2 seconds
1016  * may _cause_ the problem when there is a very high demand). At any rate,
1017  * this is better than perma-blocking.
1018  */
1019 static int is_af_unix_keeper(const int fd)
1020 {
1021     struct timeval tval = { READABLE_UNIX_FD_DROP_DEAD_TIMEVAL };
1022     fd_set read_fds;
1023
1024     FD_ZERO(&read_fds);
1025     FD_SET(fd, &read_fds);
1026
1027     return select(fd + 1, &read_fds, NULL, NULL, &tval) >= 0 && FD_ISSET(fd, &read_fds);
1028 }
1029
1030 /*
1031  *----------------------------------------------------------------------
1032  *
1033  * OS_FcgiIpcAccept --
1034  *
1035  *      Accepts a new FastCGI connection.  This routine knows whether
1036  *      we're dealing with TCP based sockets or NT Named Pipes for IPC.
1037  *
1038  * Results:
1039  *      -1 if the operation fails, otherwise this is a valid IPC fd.
1040  *
1041  * Side effects:
1042  *      New IPC connection is accepted.
1043  *
1044  *----------------------------------------------------------------------
1045  */
1046 int OS_FcgiIpcAccept(char *clientAddrList)
1047 {
1048     int socket;
1049     union {
1050         struct sockaddr_un un;
1051         struct sockaddr_in in;
1052     } sa;
1053 #if defined __linux__
1054     socklen_t len;
1055 #else
1056     int len;
1057 #endif
1058
1059     while (1) {
1060         if (AcquireLock(TRUE) < 0)
1061             return (-1);
1062
1063         while (1) {
1064             do {
1065                 len = sizeof(sa);
1066                 socket = accept(FCGI_LISTENSOCK_FILENO, (struct sockaddr *) &sa.un, &len);
1067             } while (socket < 0 && errno == EINTR);
1068
1069             if (socket < 0) {
1070                 if (!is_reasonable_accept_errno(errno)) {
1071                     int errnoSave = errno;
1072
1073                     ReleaseLock();
1074                     errno = errnoSave;
1075                     return (-1);
1076                 }
1077                 errno = 0;
1078             }
1079             else {
1080                 int set = 1;
1081
1082                 if (sa.in.sin_family != AF_INET)
1083                     break;
1084
1085 #ifdef TCP_NODELAY
1086                 /* No replies to outgoing data, so disable Nagle */
1087                 setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, (char *)&set, sizeof(set));
1088 #endif
1089
1090                 /* Check that the client IP address is approved */
1091                 if (ClientAddrOK(&sa.in, clientAddrList))
1092                     break;
1093
1094                 close(socket);
1095             }
1096         }  /* while(1) - accept */
1097
1098         if (ReleaseLock() < 0)
1099             return (-1);
1100
1101         if (sa.in.sin_family != AF_UNIX || is_af_unix_keeper(socket))
1102             break;
1103
1104         close(socket);
1105     }  /* while(1) - lock */
1106
1107     return (socket);
1108 }
1109 \f
1110 /*
1111  *----------------------------------------------------------------------
1112  *
1113  * OS_IpcClose
1114  *
1115  *      OS IPC routine to close an IPC connection.
1116  *
1117  * Results:
1118  *
1119  *
1120  * Side effects:
1121  *      IPC connection is closed.
1122  *
1123  *----------------------------------------------------------------------
1124  */
1125 int OS_IpcClose(int ipcFd)
1126 {
1127     return OS_Close(ipcFd);
1128 }
1129
1130 \f
1131 /*
1132  *----------------------------------------------------------------------
1133  *
1134  * OS_IsFcgi --
1135  *
1136  *      Determines whether this process is a FastCGI process or not.
1137  *
1138  * Results:
1139  *      Returns 1 if FastCGI, 0 if not.
1140  *
1141  * Side effects:
1142  *      None.
1143  *
1144  *----------------------------------------------------------------------
1145  */
1146 int OS_IsFcgi()
1147 {
1148         union {
1149         struct sockaddr_in in;
1150         struct sockaddr_un un;
1151     } sa;
1152 #if defined __linux__
1153     socklen_t len = sizeof(sa);
1154 #else
1155     int len = sizeof(sa);
1156 #endif
1157
1158     if (getpeername(FCGI_LISTENSOCK_FILENO, (struct sockaddr *)&sa, &len) != 0
1159             && errno == ENOTCONN)
1160         isFastCGI = TRUE;
1161     else
1162         isFastCGI = FALSE;
1163
1164     return (isFastCGI);
1165 }
1166 \f
1167 /*
1168  *----------------------------------------------------------------------
1169  *
1170  * OS_SetFlags --
1171  *
1172  *      Sets selected flag bits in an open file descriptor.
1173  *
1174  *----------------------------------------------------------------------
1175  */
1176 void OS_SetFlags(int fd, int flags)
1177 {
1178     int val;
1179     if((val = fcntl(fd, F_GETFL, 0)) < 0) {
1180         exit(errno);
1181     }
1182     val |= flags;
1183     if(fcntl(fd, F_SETFL, val) < 0) {
1184         exit(errno);
1185     }
1186 }